Stephen Hines | c6ca60f | 2023-05-09 02:19:22 -0700 | [diff] [blame^] | 1 | //===- TypoCorrection.h - Class for typo correction results -----*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file defines the TypoCorrection class, which stores the results of |
| 10 | // Sema's typo correction (Sema::CorrectTypo). |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_CLANG_SEMA_TYPOCORRECTION_H |
| 15 | #define LLVM_CLANG_SEMA_TYPOCORRECTION_H |
| 16 | |
| 17 | #include "clang/AST/Decl.h" |
| 18 | #include "clang/AST/DeclarationName.h" |
| 19 | #include "clang/Basic/LLVM.h" |
| 20 | #include "clang/Basic/PartialDiagnostic.h" |
| 21 | #include "clang/Basic/SourceLocation.h" |
| 22 | #include "clang/Sema/DeclSpec.h" |
| 23 | #include "llvm/ADT/ArrayRef.h" |
| 24 | #include "llvm/ADT/SmallVector.h" |
| 25 | #include "llvm/Support/Casting.h" |
| 26 | #include <cstddef> |
| 27 | #include <limits> |
| 28 | #include <string> |
| 29 | #include <utility> |
| 30 | #include <vector> |
| 31 | |
| 32 | namespace clang { |
| 33 | |
| 34 | class DeclContext; |
| 35 | class IdentifierInfo; |
| 36 | class LangOptions; |
| 37 | class MemberExpr; |
| 38 | class NestedNameSpecifier; |
| 39 | class Sema; |
| 40 | |
| 41 | /// Simple class containing the result of Sema::CorrectTypo |
| 42 | class TypoCorrection { |
| 43 | public: |
| 44 | // "Distance" for unusable corrections |
| 45 | static const unsigned InvalidDistance = std::numeric_limits<unsigned>::max(); |
| 46 | |
| 47 | // The largest distance still considered valid (larger edit distances are |
| 48 | // mapped to InvalidDistance by getEditDistance). |
| 49 | static const unsigned MaximumDistance = 10000U; |
| 50 | |
| 51 | // Relative weightings of the "edit distance" components. The higher the |
| 52 | // weight, the more of a penalty to fitness the component will give (higher |
| 53 | // weights mean greater contribution to the total edit distance, with the |
| 54 | // best correction candidates having the lowest edit distance). |
| 55 | static const unsigned CharDistanceWeight = 100U; |
| 56 | static const unsigned QualifierDistanceWeight = 110U; |
| 57 | static const unsigned CallbackDistanceWeight = 150U; |
| 58 | |
| 59 | TypoCorrection(const DeclarationName &Name, NamedDecl *NameDecl, |
| 60 | NestedNameSpecifier *NNS = nullptr, unsigned CharDistance = 0, |
| 61 | unsigned QualifierDistance = 0) |
| 62 | : CorrectionName(Name), CorrectionNameSpec(NNS), |
| 63 | CharDistance(CharDistance), QualifierDistance(QualifierDistance) { |
| 64 | if (NameDecl) |
| 65 | CorrectionDecls.push_back(NameDecl); |
| 66 | } |
| 67 | |
| 68 | TypoCorrection(NamedDecl *Name, NestedNameSpecifier *NNS = nullptr, |
| 69 | unsigned CharDistance = 0) |
| 70 | : CorrectionName(Name->getDeclName()), CorrectionNameSpec(NNS), |
| 71 | CharDistance(CharDistance) { |
| 72 | if (Name) |
| 73 | CorrectionDecls.push_back(Name); |
| 74 | } |
| 75 | |
| 76 | TypoCorrection(DeclarationName Name, NestedNameSpecifier *NNS = nullptr, |
| 77 | unsigned CharDistance = 0) |
| 78 | : CorrectionName(Name), CorrectionNameSpec(NNS), |
| 79 | CharDistance(CharDistance) {} |
| 80 | |
| 81 | TypoCorrection() = default; |
| 82 | |
| 83 | /// Gets the DeclarationName of the typo correction |
| 84 | DeclarationName getCorrection() const { return CorrectionName; } |
| 85 | |
| 86 | IdentifierInfo *getCorrectionAsIdentifierInfo() const { |
| 87 | return CorrectionName.getAsIdentifierInfo(); |
| 88 | } |
| 89 | |
| 90 | /// Gets the NestedNameSpecifier needed to use the typo correction |
| 91 | NestedNameSpecifier *getCorrectionSpecifier() const { |
| 92 | return CorrectionNameSpec; |
| 93 | } |
| 94 | |
| 95 | void setCorrectionSpecifier(NestedNameSpecifier *NNS) { |
| 96 | CorrectionNameSpec = NNS; |
| 97 | ForceSpecifierReplacement = (NNS != nullptr); |
| 98 | } |
| 99 | |
| 100 | void WillReplaceSpecifier(bool ForceReplacement) { |
| 101 | ForceSpecifierReplacement = ForceReplacement; |
| 102 | } |
| 103 | |
| 104 | bool WillReplaceSpecifier() const { |
| 105 | return ForceSpecifierReplacement; |
| 106 | } |
| 107 | |
| 108 | void setQualifierDistance(unsigned ED) { |
| 109 | QualifierDistance = ED; |
| 110 | } |
| 111 | |
| 112 | void setCallbackDistance(unsigned ED) { |
| 113 | CallbackDistance = ED; |
| 114 | } |
| 115 | |
| 116 | // Convert the given weighted edit distance to a roughly equivalent number of |
| 117 | // single-character edits (typically for comparison to the length of the |
| 118 | // string being edited). |
| 119 | static unsigned NormalizeEditDistance(unsigned ED) { |
| 120 | if (ED > MaximumDistance) |
| 121 | return InvalidDistance; |
| 122 | return (ED + CharDistanceWeight / 2) / CharDistanceWeight; |
| 123 | } |
| 124 | |
| 125 | /// Gets the "edit distance" of the typo correction from the typo. |
| 126 | /// If Normalized is true, scale the distance down by the CharDistanceWeight |
| 127 | /// to return the edit distance in terms of single-character edits. |
| 128 | unsigned getEditDistance(bool Normalized = true) const { |
| 129 | if (CharDistance > MaximumDistance || QualifierDistance > MaximumDistance || |
| 130 | CallbackDistance > MaximumDistance) |
| 131 | return InvalidDistance; |
| 132 | unsigned ED = |
| 133 | CharDistance * CharDistanceWeight + |
| 134 | QualifierDistance * QualifierDistanceWeight + |
| 135 | CallbackDistance * CallbackDistanceWeight; |
| 136 | if (ED > MaximumDistance) |
| 137 | return InvalidDistance; |
| 138 | // Half the CharDistanceWeight is added to ED to simulate rounding since |
| 139 | // integer division truncates the value (i.e. round-to-nearest-int instead |
| 140 | // of round-to-zero). |
| 141 | return Normalized ? NormalizeEditDistance(ED) : ED; |
| 142 | } |
| 143 | |
| 144 | /// Get the correction declaration found by name lookup (before we |
| 145 | /// looked through using shadow declarations and the like). |
| 146 | NamedDecl *getFoundDecl() const { |
| 147 | return hasCorrectionDecl() ? *(CorrectionDecls.begin()) : nullptr; |
| 148 | } |
| 149 | |
| 150 | /// Gets the pointer to the declaration of the typo correction |
| 151 | NamedDecl *getCorrectionDecl() const { |
| 152 | auto *D = getFoundDecl(); |
| 153 | return D ? D->getUnderlyingDecl() : nullptr; |
| 154 | } |
| 155 | template <class DeclClass> |
| 156 | DeclClass *getCorrectionDeclAs() const { |
| 157 | return dyn_cast_or_null<DeclClass>(getCorrectionDecl()); |
| 158 | } |
| 159 | |
| 160 | /// Clears the list of NamedDecls. |
| 161 | void ClearCorrectionDecls() { |
| 162 | CorrectionDecls.clear(); |
| 163 | } |
| 164 | |
| 165 | /// Clears the list of NamedDecls before adding the new one. |
| 166 | void setCorrectionDecl(NamedDecl *CDecl) { |
| 167 | CorrectionDecls.clear(); |
| 168 | addCorrectionDecl(CDecl); |
| 169 | } |
| 170 | |
| 171 | /// Clears the list of NamedDecls and adds the given set. |
| 172 | void setCorrectionDecls(ArrayRef<NamedDecl*> Decls) { |
| 173 | CorrectionDecls.clear(); |
| 174 | CorrectionDecls.insert(CorrectionDecls.begin(), Decls.begin(), Decls.end()); |
| 175 | } |
| 176 | |
| 177 | /// Add the given NamedDecl to the list of NamedDecls that are the |
| 178 | /// declarations associated with the DeclarationName of this TypoCorrection |
| 179 | void addCorrectionDecl(NamedDecl *CDecl); |
| 180 | |
| 181 | std::string getAsString(const LangOptions &LO) const; |
| 182 | |
| 183 | std::string getQuoted(const LangOptions &LO) const { |
| 184 | return "'" + getAsString(LO) + "'"; |
| 185 | } |
| 186 | |
| 187 | /// Returns whether this TypoCorrection has a non-empty DeclarationName |
| 188 | explicit operator bool() const { return bool(CorrectionName); } |
| 189 | |
| 190 | /// Mark this TypoCorrection as being a keyword. |
| 191 | /// Since addCorrectionDeclsand setCorrectionDecl don't allow NULL to be |
| 192 | /// added to the list of the correction's NamedDecl pointers, NULL is added |
| 193 | /// as the only element in the list to mark this TypoCorrection as a keyword. |
| 194 | void makeKeyword() { |
| 195 | CorrectionDecls.clear(); |
| 196 | CorrectionDecls.push_back(nullptr); |
| 197 | ForceSpecifierReplacement = true; |
| 198 | } |
| 199 | |
| 200 | // Check if this TypoCorrection is a keyword by checking if the first |
| 201 | // item in CorrectionDecls is NULL. |
| 202 | bool isKeyword() const { |
| 203 | return !CorrectionDecls.empty() && CorrectionDecls.front() == nullptr; |
| 204 | } |
| 205 | |
| 206 | // Check if this TypoCorrection is the given keyword. |
| 207 | template<std::size_t StrLen> |
| 208 | bool isKeyword(const char (&Str)[StrLen]) const { |
| 209 | return isKeyword() && getCorrectionAsIdentifierInfo()->isStr(Str); |
| 210 | } |
| 211 | |
| 212 | // Returns true if the correction either is a keyword or has a known decl. |
| 213 | bool isResolved() const { return !CorrectionDecls.empty(); } |
| 214 | |
| 215 | bool isOverloaded() const { |
| 216 | return CorrectionDecls.size() > 1; |
| 217 | } |
| 218 | |
| 219 | void setCorrectionRange(CXXScopeSpec *SS, |
| 220 | const DeclarationNameInfo &TypoName) { |
| 221 | CorrectionRange = TypoName.getSourceRange(); |
| 222 | if (ForceSpecifierReplacement && SS && !SS->isEmpty()) |
| 223 | CorrectionRange.setBegin(SS->getBeginLoc()); |
| 224 | } |
| 225 | |
| 226 | SourceRange getCorrectionRange() const { |
| 227 | return CorrectionRange; |
| 228 | } |
| 229 | |
| 230 | using decl_iterator = SmallVectorImpl<NamedDecl *>::iterator; |
| 231 | |
| 232 | decl_iterator begin() { |
| 233 | return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin(); |
| 234 | } |
| 235 | |
| 236 | decl_iterator end() { return CorrectionDecls.end(); } |
| 237 | |
| 238 | using const_decl_iterator = SmallVectorImpl<NamedDecl *>::const_iterator; |
| 239 | |
| 240 | const_decl_iterator begin() const { |
| 241 | return isKeyword() ? CorrectionDecls.end() : CorrectionDecls.begin(); |
| 242 | } |
| 243 | |
| 244 | const_decl_iterator end() const { return CorrectionDecls.end(); } |
| 245 | |
| 246 | /// Returns whether this typo correction is correcting to a |
| 247 | /// declaration that was declared in a module that has not been imported. |
| 248 | bool requiresImport() const { return RequiresImport; } |
| 249 | void setRequiresImport(bool Req) { RequiresImport = Req; } |
| 250 | |
| 251 | /// Extra diagnostics are printed after the first diagnostic for the typo. |
| 252 | /// This can be used to attach external notes to the diag. |
| 253 | void addExtraDiagnostic(PartialDiagnostic PD) { |
| 254 | ExtraDiagnostics.push_back(std::move(PD)); |
| 255 | } |
| 256 | ArrayRef<PartialDiagnostic> getExtraDiagnostics() const { |
| 257 | return ExtraDiagnostics; |
| 258 | } |
| 259 | |
| 260 | private: |
| 261 | bool hasCorrectionDecl() const { |
| 262 | return (!isKeyword() && !CorrectionDecls.empty()); |
| 263 | } |
| 264 | |
| 265 | // Results. |
| 266 | DeclarationName CorrectionName; |
| 267 | NestedNameSpecifier *CorrectionNameSpec = nullptr; |
| 268 | SmallVector<NamedDecl *, 1> CorrectionDecls; |
| 269 | unsigned CharDistance = 0; |
| 270 | unsigned QualifierDistance = 0; |
| 271 | unsigned CallbackDistance = 0; |
| 272 | SourceRange CorrectionRange; |
| 273 | bool ForceSpecifierReplacement = false; |
| 274 | bool RequiresImport = false; |
| 275 | |
| 276 | std::vector<PartialDiagnostic> ExtraDiagnostics; |
| 277 | }; |
| 278 | |
| 279 | /// Base class for callback objects used by Sema::CorrectTypo to check |
| 280 | /// the validity of a potential typo correction. |
| 281 | class CorrectionCandidateCallback { |
| 282 | public: |
| 283 | static const unsigned InvalidDistance = TypoCorrection::InvalidDistance; |
| 284 | |
| 285 | explicit CorrectionCandidateCallback(IdentifierInfo *Typo = nullptr, |
| 286 | NestedNameSpecifier *TypoNNS = nullptr) |
| 287 | : Typo(Typo), TypoNNS(TypoNNS) {} |
| 288 | |
| 289 | virtual ~CorrectionCandidateCallback() = default; |
| 290 | |
| 291 | /// Simple predicate used by the default RankCandidate to |
| 292 | /// determine whether to return an edit distance of 0 or InvalidDistance. |
| 293 | /// This can be overridden by validators that only need to determine if a |
| 294 | /// candidate is viable, without ranking potentially viable candidates. |
| 295 | /// Only ValidateCandidate or RankCandidate need to be overridden by a |
| 296 | /// callback wishing to check the viability of correction candidates. |
| 297 | /// The default predicate always returns true if the candidate is not a type |
| 298 | /// name or keyword, true for types if WantTypeSpecifiers is true, and true |
| 299 | /// for keywords if WantTypeSpecifiers, WantExpressionKeywords, |
| 300 | /// WantCXXNamedCasts, WantRemainingKeywords, or WantObjCSuper is true. |
| 301 | virtual bool ValidateCandidate(const TypoCorrection &candidate); |
| 302 | |
| 303 | /// Method used by Sema::CorrectTypo to assign an "edit distance" rank |
| 304 | /// to a candidate (where a lower value represents a better candidate), or |
| 305 | /// returning InvalidDistance if the candidate is not at all viable. For |
| 306 | /// validation callbacks that only need to determine if a candidate is viable, |
| 307 | /// the default RankCandidate returns either 0 or InvalidDistance depending |
| 308 | /// whether ValidateCandidate returns true or false. |
| 309 | virtual unsigned RankCandidate(const TypoCorrection &candidate) { |
| 310 | return (!MatchesTypo(candidate) && ValidateCandidate(candidate)) |
| 311 | ? 0 |
| 312 | : InvalidDistance; |
| 313 | } |
| 314 | |
| 315 | /// Clone this CorrectionCandidateCallback. CorrectionCandidateCallbacks are |
| 316 | /// initially stack-allocated. However in case where delayed typo-correction |
| 317 | /// is done we need to move the callback to storage with a longer lifetime. |
| 318 | /// Every class deriving from CorrectionCandidateCallback must implement |
| 319 | /// this method. |
| 320 | virtual std::unique_ptr<CorrectionCandidateCallback> clone() = 0; |
| 321 | |
| 322 | void setTypoName(IdentifierInfo *II) { Typo = II; } |
| 323 | void setTypoNNS(NestedNameSpecifier *NNS) { TypoNNS = NNS; } |
| 324 | |
| 325 | // Flags for context-dependent keywords. WantFunctionLikeCasts is only |
| 326 | // used/meaningful when WantCXXNamedCasts is false. |
| 327 | // TODO: Expand these to apply to non-keywords or possibly remove them. |
| 328 | bool WantTypeSpecifiers = true; |
| 329 | bool WantExpressionKeywords = true; |
| 330 | bool WantCXXNamedCasts = true; |
| 331 | bool WantFunctionLikeCasts = true; |
| 332 | bool WantRemainingKeywords = true; |
| 333 | bool WantObjCSuper = false; |
| 334 | // Temporary hack for the one case where a CorrectTypoContext enum is used |
| 335 | // when looking up results. |
| 336 | bool IsObjCIvarLookup = false; |
| 337 | bool IsAddressOfOperand = false; |
| 338 | |
| 339 | protected: |
| 340 | bool MatchesTypo(const TypoCorrection &candidate) { |
| 341 | return Typo && candidate.isResolved() && !candidate.requiresImport() && |
| 342 | candidate.getCorrectionAsIdentifierInfo() == Typo && |
| 343 | // FIXME: This probably does not return true when both |
| 344 | // NestedNameSpecifiers have the same textual representation. |
| 345 | candidate.getCorrectionSpecifier() == TypoNNS; |
| 346 | } |
| 347 | |
| 348 | IdentifierInfo *Typo; |
| 349 | NestedNameSpecifier *TypoNNS; |
| 350 | }; |
| 351 | |
| 352 | class DefaultFilterCCC final : public CorrectionCandidateCallback { |
| 353 | public: |
| 354 | explicit DefaultFilterCCC(IdentifierInfo *Typo = nullptr, |
| 355 | NestedNameSpecifier *TypoNNS = nullptr) |
| 356 | : CorrectionCandidateCallback(Typo, TypoNNS) {} |
| 357 | |
| 358 | std::unique_ptr<CorrectionCandidateCallback> clone() override { |
| 359 | return std::make_unique<DefaultFilterCCC>(*this); |
| 360 | } |
| 361 | }; |
| 362 | |
| 363 | /// Simple template class for restricting typo correction candidates |
| 364 | /// to ones having a single Decl* of the given type. |
| 365 | template <class C> |
| 366 | class DeclFilterCCC final : public CorrectionCandidateCallback { |
| 367 | public: |
| 368 | bool ValidateCandidate(const TypoCorrection &candidate) override { |
| 369 | return candidate.getCorrectionDeclAs<C>(); |
| 370 | } |
| 371 | std::unique_ptr<CorrectionCandidateCallback> clone() override { |
| 372 | return std::make_unique<DeclFilterCCC>(*this); |
| 373 | } |
| 374 | }; |
| 375 | |
| 376 | // Callback class to limit the allowed keywords and to only accept typo |
| 377 | // corrections that are keywords or whose decls refer to functions (or template |
| 378 | // functions) that accept the given number of arguments. |
| 379 | class FunctionCallFilterCCC : public CorrectionCandidateCallback { |
| 380 | public: |
| 381 | FunctionCallFilterCCC(Sema &SemaRef, unsigned NumArgs, |
| 382 | bool HasExplicitTemplateArgs, |
| 383 | MemberExpr *ME = nullptr); |
| 384 | |
| 385 | bool ValidateCandidate(const TypoCorrection &candidate) override; |
| 386 | std::unique_ptr<CorrectionCandidateCallback> clone() override { |
| 387 | return std::make_unique<FunctionCallFilterCCC>(*this); |
| 388 | } |
| 389 | |
| 390 | private: |
| 391 | unsigned NumArgs; |
| 392 | bool HasExplicitTemplateArgs; |
| 393 | DeclContext *CurContext; |
| 394 | MemberExpr *MemberFn; |
| 395 | }; |
| 396 | |
| 397 | // Callback class that effectively disabled typo correction |
| 398 | class NoTypoCorrectionCCC final : public CorrectionCandidateCallback { |
| 399 | public: |
| 400 | NoTypoCorrectionCCC() { |
| 401 | WantTypeSpecifiers = false; |
| 402 | WantExpressionKeywords = false; |
| 403 | WantCXXNamedCasts = false; |
| 404 | WantFunctionLikeCasts = false; |
| 405 | WantRemainingKeywords = false; |
| 406 | } |
| 407 | |
| 408 | bool ValidateCandidate(const TypoCorrection &candidate) override { |
| 409 | return false; |
| 410 | } |
| 411 | std::unique_ptr<CorrectionCandidateCallback> clone() override { |
| 412 | return std::make_unique<NoTypoCorrectionCCC>(*this); |
| 413 | } |
| 414 | }; |
| 415 | |
| 416 | } // namespace clang |
| 417 | |
| 418 | #endif // LLVM_CLANG_SEMA_TYPOCORRECTION_H |