60 #include "llvm/ADT/Optional.h" 61 #include "llvm/Support/Format.h" 66 constexpr
int FuzzyMatcher::MaxPat;
67 constexpr
int FuzzyMatcher::MaxWord;
69 static char lower(
char C) {
return C >=
'A' && C <=
'Z' ? C + (
'a' -
'A') : C; }
74 static bool isAwful(
int S) {
return S < AwfulScore / 2; }
78 : PatN(std::min<int>(MaxPat, Pattern.size())),
79 ScoreScale(PatN ? float{1} / (PerfectBonus * PatN) : 0), WordN(0) {
80 std::copy(Pattern.begin(), Pattern.begin() + PatN, Pat);
81 for (
int I = 0; I < PatN; ++I)
82 LowPat[I] =
lower(Pat[I]);
83 Scores[0][0][Miss] = {0, Miss};
85 for (
int P = 0; P <= PatN; ++P)
86 for (
int W = 0; W < P; ++W)
87 for (Action A : {Miss, Match})
90 llvm::makeMutableArrayRef(PatRole, PatN));
94 if (!(WordContainsPattern = init(Word)))
99 auto Best = std::max(Scores[PatN][WordN][Miss].Score,
100 Scores[PatN][WordN][Match].Score);
104 ScoreScale * std::min(PerfectBonus * PatN, std::max<int>(0, Best));
116 0x00, 0x00, 0x00, 0x00,
117 0x00, 0x00, 0x00, 0x00,
118 0xff, 0xff, 0xff, 0xff,
119 0x55, 0x55, 0xf5, 0xff,
120 0xab, 0xaa, 0xaa, 0xaa,
121 0xaa, 0xaa, 0xea, 0xff,
122 0x57, 0x55, 0x55, 0x55,
123 0x55, 0x55, 0xd5, 0x3f,
124 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
125 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
126 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
127 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
145 0x00, 0xaa, 0xaa, 0xff,
146 0x00, 0x55, 0xaa, 0xff,
147 0x00, 0x55, 0x59, 0xff,
148 0x00, 0xaa, 0xaa, 0xff,
152 template <
typename T>
static T
packedLookup(
const uint8_t *Data,
int I) {
153 return static_cast<T
>((Data[I >> 2] >> ((I & 3) * 2)) & 3);
156 llvm::MutableArrayRef<CharRole> Roles) {
157 assert(Text.size() == Roles.size());
158 if (Text.size() == 0)
166 auto Rotate = [&](
CharType T) { Types = ((Types << 2) | T) & 0x3f; };
167 for (
unsigned I = 0; I < Text.size() - 1; ++I) {
169 Type = packedLookup<CharType>(
CharTypes, Text[I + 1]);
170 TypeSet |= 1 << Type;
172 Roles[I] = packedLookup<CharRole>(
CharRoles, Types);
176 Roles[Text.size() - 1] = packedLookup<CharRole>(
CharRoles, Types);
182 bool FuzzyMatcher::init(llvm::StringRef NewWord) {
183 WordN = std::min<int>(MaxWord, NewWord.size());
186 std::copy(NewWord.begin(), NewWord.begin() + WordN, Word);
189 for (
int I = 0; I < WordN; ++I)
190 LowWord[I] =
lower(Word[I]);
193 for (
int W = 0, P = 0; P != PatN; ++W) {
196 if (LowWord[W] == LowPat[P])
204 llvm::makeMutableArrayRef(WordRole, WordN));
217 void FuzzyMatcher::buildGraph() {
218 for (
int W = 0; W < WordN; ++W) {
219 Scores[0][W + 1][Miss] = {Scores[0][W][Miss].Score - skipPenalty(W, Miss),
223 for (
int P = 0; P < PatN; ++P) {
224 for (
int W = P; W < WordN; ++W) {
225 auto &Score = Scores[P + 1][W + 1], &PreMiss = Scores[P + 1][W];
227 auto MatchMissScore = PreMiss[Match].Score;
228 auto MissMissScore = PreMiss[Miss].Score;
230 MatchMissScore -= skipPenalty(W, Match);
231 MissMissScore -= skipPenalty(W, Miss);
233 Score[Miss] = (MatchMissScore > MissMissScore)
234 ? ScoreInfo{MatchMissScore, Match}
235 : ScoreInfo{MissMissScore, Miss};
237 auto &PreMatch = Scores[P][W];
238 auto MatchMatchScore =
239 allowMatch(P, W, Match)
240 ? PreMatch[Match].Score + matchBonus(P, W, Match)
242 auto MissMatchScore = allowMatch(P, W, Miss)
243 ? PreMatch[Miss].Score + matchBonus(P, W, Miss)
245 Score[Match] = (MatchMatchScore > MissMatchScore)
246 ? ScoreInfo{MatchMatchScore, Match}
247 : ScoreInfo{MissMatchScore, Miss};
252 bool FuzzyMatcher::allowMatch(
int P,
int W, Action Last)
const {
253 if (LowPat[P] != LowWord[W])
263 if (WordRole[W] ==
Tail &&
264 (Word[W] == LowWord[W] || !(WordTypeSet & 1 <<
Lower)))
270 int FuzzyMatcher::skipPenalty(
int W, Action Last)
const {
272 if (WordRole[W] ==
Head)
279 int FuzzyMatcher::matchBonus(
int P,
int W, Action Last)
const {
280 assert(LowPat[P] == LowWord[W]);
286 if ((Pat[P] == Word[W] && ((PatTypeSet & 1 <<
Upper) || P == W)) ||
287 (PatRole[P] ==
Head && WordRole[W] ==
Head))
290 if (WordRole[W] ==
Tail && P && Last == Miss)
293 if (PatRole[P] ==
Head && WordRole[W] ==
Tail)
296 if (P == 0 && WordRole[W] ==
Tail)
298 assert(S <= PerfectBonus);
303 llvm::SmallString<256>
Result;
304 OS <<
"=== Match \"" << llvm::StringRef(Word, WordN) <<
"\" against [" 305 << llvm::StringRef(Pat, PatN) <<
"] ===\n";
307 OS <<
"Pattern is empty: perfect match.\n";
308 return Result = llvm::StringRef(Word, WordN);
311 OS <<
"Word is empty: no match.\n";
314 if (!WordContainsPattern) {
315 OS <<
"Substring check failed.\n";
317 }
else if (
isAwful(std::max(Scores[PatN][WordN][Match].Score,
318 Scores[PatN][WordN][Miss].Score))) {
319 OS <<
"Substring check passed, but all matches are forbidden\n";
321 if (!(PatTypeSet & 1 <<
Upper))
322 OS <<
"Lowercase query, so scoring ignores case\n";
328 (Scores[PatN][WordN][Match].Score > Scores[PatN][WordN][Miss].Score)
333 for (
int W = WordN - 1, P = PatN - 1; W >= 0; --W) {
335 const auto &Cell = Scores[P + 1][W + 1][Last];
338 const auto &Prev = Scores[P + 1][W][Cell.Prev];
339 S[W] = Cell.Score - Prev.Score;
342 for (
int I = 0; I < WordN; ++I) {
343 if (A[I] == Match && (I == 0 || A[I - 1] == Miss))
344 Result.push_back(
'[');
345 if (A[I] == Miss && I > 0 && A[I - 1] == Match)
346 Result.push_back(
']');
347 Result.push_back(Word[I]);
349 if (A[WordN - 1] == Match)
350 Result.push_back(
']');
352 for (
char C : llvm::StringRef(Word, WordN))
353 OS <<
" " << C <<
" ";
355 for (
int I = 0, J = 0; I < WordN; I++)
356 OS <<
" " << (A[I] == Match ? Pat[J++] :
' ') <<
" ";
358 for (
int I = 0; I < WordN; I++)
359 OS << llvm::format(
"%2d ", S[I]);
362 OS <<
"\nSegmentation:";
363 OS <<
"\n'" << llvm::StringRef(Word, WordN) <<
"'\n ";
364 for (
int I = 0; I < WordN; ++I)
365 OS <<
"?-+ "[static_cast<int>(WordRole[I])];
366 OS <<
"\n[" << llvm::StringRef(Pat, PatN) <<
"]\n ";
367 for (
int I = 0; I < PatN; ++I)
368 OS <<
"?-+ "[static_cast<int>(PatRole[I])];
371 OS <<
"\nScoring table (last-Miss, last-Match):\n";
373 for (
char C : llvm::StringRef(Word, WordN))
374 OS <<
" " << C <<
" ";
376 OS <<
"-+----" << std::string(WordN * 4,
'-') <<
"\n";
377 for (
int I = 0; I <= PatN; ++I) {
378 for (Action A : {Miss, Match}) {
379 OS << ((I && A == Miss) ? Pat[I - 1] :
' ') <<
"|";
380 for (
int J = 0; J <= WordN; ++J) {
381 if (!
isAwful(Scores[I][J][A].Score))
382 OS << llvm::format(
"%3d%c", Scores[I][J][A].Score,
383 Scores[I][J][A].Prev == Match ?
'*' :
' ');
static constexpr uint8_t CharTypes[]
static constexpr uint8_t CharRoles[]
Documents should not be synced at all.
unsigned char CharTypeSet
llvm::Optional< float > match(llvm::StringRef Word)
static bool isAwful(int S)
CharTypeSet calculateRoles(llvm::StringRef Text, llvm::MutableArrayRef< CharRole > Roles)
llvm::Optional< llvm::Expected< tooling::AtomicChanges > > Result
FuzzyMatcher(llvm::StringRef Pattern)
llvm::SmallString< 256 > dumpLast(llvm::raw_ostream &) const
static char lower(char C)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static constexpr int PerfectBonus
static constexpr int AwfulScore
static T packedLookup(const uint8_t *Data, int I)