https://github.com/upsj created https://github.com/llvm/llvm-project/pull/177400
The current documentation leaves some questions unanswered to me, which I'm trying to clarify here. 1. It was unclear how SourceLocation differed when referring to the character level vs. the token level. Turns out there is no such difference, and SourceLocation always refers to characters. This should be made explicit in the docs. 2. It was unclear in which cases (Char)SourceRange is inclusive (containing the endpoint) or exclusive (ending before the endpoint). From my reading of the docs and investigating the behavior of different AST nodes' `getSourceLoc()` result and `Lexer::getSourceText()`, SourceRange is always inclusive and CharSourceRange is inclusive only as a TokenRange, and exclusive as a CharRange. This is also consistent matches with the documentation of the clang::transformer::after() function in RangeSelector.h, where the question of inclusive/exclusive ranges came up first for me. >From 9615e09e8a61a564c2fbaa31967f505590c6beaa Mon Sep 17 00:00:00 2001 From: Tobias Ribizel <[email protected]> Date: Thu, 22 Jan 2026 17:41:44 +0100 Subject: [PATCH 1/2] [clang] format SourceLocation.h --- clang/include/clang/Basic/SourceLocation.h | 133 ++++++++++----------- 1 file changed, 61 insertions(+), 72 deletions(-) diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h index bd0038d5ae1ae..aab021ad7b610 100644 --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -104,7 +104,7 @@ class SourceLocation { enum : UIntTy { MacroIDBit = 1ULL << (8 * sizeof(UIntTy) - 1) }; public: - bool isFileID() const { return (ID & MacroIDBit) == 0; } + bool isFileID() const { return (ID & MacroIDBit) == 0; } bool isMacroID() const { return (ID & MacroIDBit) != 0; } /// Return true if this is a valid SourceLocation object. @@ -137,9 +137,9 @@ class SourceLocation { /// Return a source location with the specified offset from this /// SourceLocation. SourceLocation getLocWithOffset(IntTy Offset) const { - assert(((getOffset()+Offset) & MacroIDBit) == 0 && "offset overflow"); + assert(((getOffset() + Offset) & MacroIDBit) == 0 && "offset overflow"); SourceLocation L; - L.ID = ID+Offset; + L.ID = ID + Offset; return L; } @@ -165,10 +165,10 @@ class SourceLocation { /// /// This should only be passed to SourceLocation::getFromPtrEncoding, it /// should not be inspected directly. - void* getPtrEncoding() const { + void *getPtrEncoding() const { // Double cast to avoid a warning "cast to pointer from integer of different // size". - return (void*)(uintptr_t)getRawEncoding(); + return (void *)(uintptr_t)getRawEncoding(); } /// Turn a pointer encoding of a SourceLocation object back @@ -230,13 +230,9 @@ class SourceRange { bool isValid() const { return B.isValid() && E.isValid(); } bool isInvalid() const { return !isValid(); } - bool operator==(const SourceRange &X) const { - return B == X.B && E == X.E; - } + bool operator==(const SourceRange &X) const { return B == X.B && E == X.E; } - bool operator!=(const SourceRange &X) const { - return B != X.B || E != X.E; - } + bool operator!=(const SourceRange &X) const { return B != X.B || E != X.E; } // Returns true iff other is wholly contained within this range. bool fullyContains(const SourceRange &other) const { @@ -446,7 +442,7 @@ class FullSourceLoc : public SourceLocation { /// Comparison function class, useful for sorting FullSourceLocs. struct BeforeThanCompare { - bool operator()(const FullSourceLoc& lhs, const FullSourceLoc& rhs) const { + bool operator()(const FullSourceLoc &lhs, const FullSourceLoc &rhs) const { return lhs.isBeforeInTranslationUnitThan(rhs); } }; @@ -456,14 +452,12 @@ class FullSourceLoc : public SourceLocation { /// This is useful for debugging. void dump() const; - friend bool - operator==(const FullSourceLoc &LHS, const FullSourceLoc &RHS) { + friend bool operator==(const FullSourceLoc &LHS, const FullSourceLoc &RHS) { return LHS.getRawEncoding() == RHS.getRawEncoding() && - LHS.SrcMgr == RHS.SrcMgr; + LHS.SrcMgr == RHS.SrcMgr; } - friend bool - operator!=(const FullSourceLoc &LHS, const FullSourceLoc &RHS) { + friend bool operator!=(const FullSourceLoc &LHS, const FullSourceLoc &RHS) { return !(LHS == RHS); } }; @@ -472,73 +466,68 @@ class FullSourceLoc : public SourceLocation { namespace llvm { - /// Define DenseMapInfo so that FileID's can be used as keys in DenseMap and - /// DenseSets. - template <> - struct DenseMapInfo<clang::FileID, void> { - static clang::FileID getEmptyKey() { - return {}; - } +/// Define DenseMapInfo so that FileID's can be used as keys in DenseMap and +/// DenseSets. +template <> struct DenseMapInfo<clang::FileID, void> { + static clang::FileID getEmptyKey() { return {}; } - static clang::FileID getTombstoneKey() { - return clang::FileID::getSentinel(); - } + static clang::FileID getTombstoneKey() { + return clang::FileID::getSentinel(); + } - static unsigned getHashValue(clang::FileID S) { - return S.getHashValue(); - } + static unsigned getHashValue(clang::FileID S) { return S.getHashValue(); } - static bool isEqual(clang::FileID LHS, clang::FileID RHS) { - return LHS == RHS; - } - }; + static bool isEqual(clang::FileID LHS, clang::FileID RHS) { + return LHS == RHS; + } +}; - /// Define DenseMapInfo so that SourceLocation's can be used as keys in - /// DenseMap and DenseSet. This trait class is eqivalent to - /// DenseMapInfo<unsigned> which uses SourceLocation::ID is used as a key. - template <> struct DenseMapInfo<clang::SourceLocation, void> { - static clang::SourceLocation getEmptyKey() { - constexpr clang::SourceLocation::UIntTy Zero = 0; - return clang::SourceLocation::getFromRawEncoding(~Zero); - } +/// Define DenseMapInfo so that SourceLocation's can be used as keys in +/// DenseMap and DenseSet. This trait class is eqivalent to +/// DenseMapInfo<unsigned> which uses SourceLocation::ID is used as a key. +template <> struct DenseMapInfo<clang::SourceLocation, void> { + static clang::SourceLocation getEmptyKey() { + constexpr clang::SourceLocation::UIntTy Zero = 0; + return clang::SourceLocation::getFromRawEncoding(~Zero); + } - static clang::SourceLocation getTombstoneKey() { - constexpr clang::SourceLocation::UIntTy Zero = 0; - return clang::SourceLocation::getFromRawEncoding(~Zero - 1); - } + static clang::SourceLocation getTombstoneKey() { + constexpr clang::SourceLocation::UIntTy Zero = 0; + return clang::SourceLocation::getFromRawEncoding(~Zero - 1); + } - static unsigned getHashValue(clang::SourceLocation Loc) { - return Loc.getHashValue(); - } + static unsigned getHashValue(clang::SourceLocation Loc) { + return Loc.getHashValue(); + } - static bool isEqual(clang::SourceLocation LHS, clang::SourceLocation RHS) { - return LHS == RHS; - } - }; + static bool isEqual(clang::SourceLocation LHS, clang::SourceLocation RHS) { + return LHS == RHS; + } +}; - // Allow calling FoldingSetNodeID::Add with SourceLocation object as parameter - template <> struct FoldingSetTrait<clang::SourceLocation, void> { - static void Profile(const clang::SourceLocation &X, FoldingSetNodeID &ID); - }; +// Allow calling FoldingSetNodeID::Add with SourceLocation object as parameter +template <> struct FoldingSetTrait<clang::SourceLocation, void> { + static void Profile(const clang::SourceLocation &X, FoldingSetNodeID &ID); +}; - template <> struct DenseMapInfo<clang::SourceRange> { - static clang::SourceRange getEmptyKey() { - return DenseMapInfo<clang::SourceLocation>::getEmptyKey(); - } +template <> struct DenseMapInfo<clang::SourceRange> { + static clang::SourceRange getEmptyKey() { + return DenseMapInfo<clang::SourceLocation>::getEmptyKey(); + } - static clang::SourceRange getTombstoneKey() { - return DenseMapInfo<clang::SourceLocation>::getTombstoneKey(); - } + static clang::SourceRange getTombstoneKey() { + return DenseMapInfo<clang::SourceLocation>::getTombstoneKey(); + } - static unsigned getHashValue(clang::SourceRange Range) { - return detail::combineHashValue(Range.getBegin().getHashValue(), - Range.getEnd().getHashValue()); - } + static unsigned getHashValue(clang::SourceRange Range) { + return detail::combineHashValue(Range.getBegin().getHashValue(), + Range.getEnd().getHashValue()); + } - static bool isEqual(clang::SourceRange LHS, clang::SourceRange RHS) { - return LHS == RHS; - } - }; + static bool isEqual(clang::SourceRange LHS, clang::SourceRange RHS) { + return LHS == RHS; + } +}; } // namespace llvm >From 645ac26db1c0273b34901344d46ac552fa5e0cb8 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel <[email protected]> Date: Thu, 22 Jan 2026 17:42:14 +0100 Subject: [PATCH 2/2] [clang] improve documentation for SourceLocation and (Char)SourceRange Clarify that SourceLocation always operates on a character level (even if referring to tokens) and SourceRange is inclusive, while CharSourceRange is exclusive usually. --- clang/include/clang/Basic/SourceLocation.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h index aab021ad7b610..69fc69ebb946c 100644 --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -86,6 +86,10 @@ using FileIDAndOffset = std::pair<FileID, unsigned>; /// In addition, one bit of SourceLocation is used for quick access to the /// information whether the location is in a file or a macro expansion. /// +/// SourceLocation operates on a character level, i.e. offsets describe +/// character distances, but in most cases, they are used on a token level, +/// where a SourceLocation points to the first character of a lexer token. +/// /// It is important that this type remains small. It is currently 32 bits wide. class SourceLocation { friend class ASTReader; @@ -212,6 +216,11 @@ inline bool operator>=(const SourceLocation &LHS, const SourceLocation &RHS) { } /// A trivial tuple used to represent a source range. +/// +/// SourceRange is an inclusive range [begin, end] that contains its endpoints, +/// and when referring to tokens, its begin SourceLocation usually points to +/// the first character of the first token and its end SourceLocation points to +/// the last character of the last token. class SourceRange { SourceLocation B; SourceLocation E; @@ -251,6 +260,15 @@ class SourceRange { /// last token of the range (a "token range"). In the token range case, the /// size of the last token must be measured to determine the actual end of the /// range. +/// +/// CharSourceRange is interpreted differently depending on whether it is a +/// TokenRange or a CharRange. +/// For a TokenRange, the range contains the endpoint, i.e. the token containing +/// the end SourceLocation. +/// For a CharRange, the range doesn't contain the endpoint, i.e. it ends at the +/// character before the end SourceLocation. This allows representing a point +/// CharRange [begin, begin) that points at the empty range right in front of +/// the begin SourceLocation. class CharSourceRange { SourceRange Range; bool IsTokenRange = false; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
