https://github.com/hokein created 
https://github.com/llvm/llvm-project/pull/145529

The delta encoding can produce values up to 33 bits, but the current decoding 
logic only preserves the lower 32 bits, potentially causing data loss.

This patch fixes the issue by preserving the lower 33 bits for the encode.

>From d0ddd1d3e3e64c45439509fcf40be1fda569d5c9 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein...@gmail.com>
Date: Tue, 24 Jun 2025 17:08:16 +0200
Subject: [PATCH] [Serialization] Fix source location data loss during
 decoding.

The delta encoding can produce values up to 33 bits, but the current decoding
logic only preserves the lower 32 bits, potentially causing data loss.

This patch fixes the issue by preserving the lower 33 bits for the
encode.
---
 .../include/clang/Serialization/SourceLocationEncoding.h | 9 ++++++---
 .../Serialization/SourceLocationEncodingTest.cpp         | 2 ++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Serialization/SourceLocationEncoding.h 
b/clang/include/clang/Serialization/SourceLocationEncoding.h
index 33ca1728fa479..85a7379a5fb55 100644
--- a/clang/include/clang/Serialization/SourceLocationEncoding.h
+++ b/clang/include/clang/Serialization/SourceLocationEncoding.h
@@ -48,6 +48,9 @@ class SourceLocationEncoding {
   using UIntTy = SourceLocation::UIntTy;
   constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
 
+  // The maximum number of bits we use for the encoding.
+  constexpr static unsigned EncodingBits = UIntBits + 1;
+
   static UIntTy encodeRaw(UIntTy Raw) {
     return (Raw << 1) | (Raw >> (UIntBits - 1));
   }
@@ -179,20 +182,20 @@ SourceLocationEncoding::encode(SourceLocation Loc, UIntTy 
BaseOffset,
 
   // 16 bits should be sufficient to store the module file index.
   assert(BaseModuleFileIndex < (1 << 16));
-  Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
+  Encoded |= (RawLocEncoding)BaseModuleFileIndex << EncodingBits;
   return Encoded;
 }
 inline std::pair<SourceLocation, unsigned>
 SourceLocationEncoding::decode(RawLocEncoding Encoded,
                                SourceLocationSequence *Seq) {
-  unsigned ModuleFileIndex = Encoded >> 32;
+  unsigned ModuleFileIndex = Encoded >> EncodingBits;
 
   if (!ModuleFileIndex)
     return {Seq ? Seq->decode(Encoded)
                 : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
             ModuleFileIndex};
 
-  Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);
+  Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(EncodingBits);
   SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
 
   return {Loc, ModuleFileIndex};
diff --git a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp 
b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp
index c80a8fd0e52b1..b21035af47cfc 100644
--- a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp
+++ b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp
@@ -104,6 +104,8 @@ TEST(SourceLocationEncoding, Sequence) {
 
   roundTrip(
       {123 | MacroBit, 1, 9, Biggest, Big, Big + 1, 0, MacroBit | Big, 0});
+
+  roundTrip({1, (1u << 30) + 1});
 }
 
 } // namespace

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to