[clang] [clang][ssaf] Fix normalization of TUSummary JSON representation and strengthen round-trip tests (PR #183241)

Aviral Goel via cfe-commits Wed, 25 Feb 2026 08:28:57 -0800

================
@@ -200,63 +260,205 @@ class JSONFormatTUSummaryTest : public JSONFormatTest {
     return JSONFormat().writeTUSummary(Summary, FilePath);
   }
 
-  // Normalize TUSummary JSON by sorting id_table by id field.
-  static Expected<json::Value> normalizeTUSummaryJSON(json::Value Val) {
-    auto *Obj = Val.getAsObject();
-    if (!Obj) {
-      return createStringError(
-          inconvertibleErrorCode(),
-          "Cannot normalize TUSummary JSON: expected an object");
-    }
+  static llvm::Error normalizeIDTable(json::Array &IDTable) {
+    for (const auto &[Index, Entry] : llvm::enumerate(IDTable)) {
+      const auto *EntryObj = Entry.getAsObject();
+      if (!EntryObj) {
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Cannot normalize TUSummary JSON: id_table entry at index %zu "
+            "is not an object",
+            Index);
+      }
 
-    auto *IDTable = Obj->getArray("id_table");
-    if (!IDTable) {
-      return createStringError(inconvertibleErrorCode(),
-                               "Cannot normalize TUSummary JSON: 'id_table' "
-                               "field is either missing or has the wrong 
type");
+      const auto *IDValue = EntryObj->get("id");
+      if (!IDValue) {
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Cannot normalize TUSummary JSON: id_table entry at index %zu "
+            "does not contain an 'id' field",
+            Index);
+      }
+
+      if (!IDValue->getAsUINT64()) {
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Cannot normalize TUSummary JSON: id_table entry at index %zu "
+            "does not contain a valid 'id' uint64_t field",
+            Index);
+      }
     }
 
-    // Validate all id_table entries before sorting.
-    for (const auto &[Index, Entry] : llvm::enumerate(*IDTable)) {
+    // Safe to dereference: all entries were validated above.
+    llvm::sort(IDTable, [](const json::Value &A, const json::Value &B) {
+      return *A.getAsObject()->get("id")->getAsUINT64() <
+             *B.getAsObject()->get("id")->getAsUINT64();
+    });
+
+    return llvm::Error::success();
+  }
+
+  static llvm::Error normalizeLinkageTable(json::Array &LinkageTable) {
+    for (const auto &[Index, Entry] : llvm::enumerate(LinkageTable)) {
       const auto *EntryObj = Entry.getAsObject();
       if (!EntryObj) {
         return createStringError(
             inconvertibleErrorCode(),
-            "Cannot normalize TUSummary JSON: id_table entry at index %zu is "
-            "not an object",
+            "Cannot normalize TUSummary JSON: linkage_table entry at index "
+            "%zu is not an object",
             Index);
       }
 
       const auto *IDValue = EntryObj->get("id");
       if (!IDValue) {
         return createStringError(
             inconvertibleErrorCode(),
-            "Cannot normalize TUSummary JSON: id_table entry at index %zu does 
"
-            "not contain an 'id' field",
+            "Cannot normalize TUSummary JSON: linkage_table entry at index "
+            "%zu does not contain an 'id' field",
             Index);
       }
 
-      auto EntryID = IDValue->getAsUINT64();
-      if (!EntryID) {
+      if (!IDValue->getAsUINT64()) {
         return createStringError(
             inconvertibleErrorCode(),
-            "Cannot normalize TUSummary JSON: id_table entry at index %zu does 
"
-            "not contain a valid 'id' uint64_t field",
+            "Cannot normalize TUSummary JSON: linkage_table entry at index "
+            "%zu does not contain a valid 'id' uint64_t field",
             Index);
       }
     }
 
-    // Sort id_table entries by the "id" field to ensure deterministic ordering
-    // for comparison. Use projection-based comparison for 
strict-weak-ordering.
-    llvm::sort(*IDTable, [](const json::Value &A, const json::Value &B) {
-      // Safe to assume these succeed because we validated above.
-      const auto *AObj = A.getAsObject();
-      const auto *BObj = B.getAsObject();
-      uint64_t AID = *AObj->get("id")->getAsUINT64();
-      uint64_t BID = *BObj->get("id")->getAsUINT64();
-      return AID < BID;
+    // Safe to dereference: all entries were validated above.
+    llvm::sort(LinkageTable, [](const json::Value &A, const json::Value &B) {
+      return *A.getAsObject()->get("id")->getAsUINT64() <
+             *B.getAsObject()->get("id")->getAsUINT64();
     });
 
+    return llvm::Error::success();
+  }
+
+  static llvm::Error normalizeSummaryData(json::Array &SummaryData,
+                                          size_t DataIndex) {
+    for (const auto &[SummaryIndex, SummaryEntry] :
+         llvm::enumerate(SummaryData)) {
+      const auto *SummaryEntryObj = SummaryEntry.getAsObject();
+      if (!SummaryEntryObj) {
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Cannot normalize TUSummary JSON: data entry at index %zu, "
+            "summary_data entry at index %zu is not an object",
+            DataIndex, SummaryIndex);
+      }
+
+      const auto *EntityIDValue = SummaryEntryObj->get("entity_id");
+      if (!EntityIDValue) {
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Cannot normalize TUSummary JSON: data entry at index %zu, "
+            "summary_data entry at index %zu does not contain an "
+            "'entity_id' field",
+            DataIndex, SummaryIndex);
+      }
+
+      if (!EntityIDValue->getAsUINT64()) {
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Cannot normalize TUSummary JSON: data entry at index %zu, "
+            "summary_data entry at index %zu does not contain a valid "
+            "'entity_id' uint64_t field",
+            DataIndex, SummaryIndex);
+      }
+    }
+
+    // Safe to dereference: all entries were validated above.
+    llvm::sort(SummaryData, [](const json::Value &A, const json::Value &B) {
+      return *A.getAsObject()->get("entity_id")->getAsUINT64() <
+             *B.getAsObject()->get("entity_id")->getAsUINT64();
+    });
+
+    return llvm::Error::success();
+  }
+
+  static llvm::Error normalizeData(json::Array &Data) {
+    for (const auto &[DataIndex, DataEntry] : llvm::enumerate(Data)) {
+      auto *DataEntryObj = DataEntry.getAsObject();
+      if (!DataEntryObj) {
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Cannot normalize TUSummary JSON: data entry at index %zu "
+            "is not an object",
+            DataIndex);
+      }
+
+      if (!DataEntryObj->getString("summary_name")) {
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Cannot normalize TUSummary JSON: data entry at index %zu "
+            "does not contain a 'summary_name' string field",
+            DataIndex);
+      }
+
+      auto *SummaryData = DataEntryObj->getArray("summary_data");
+      if (!SummaryData) {
+        return createStringError(
+            inconvertibleErrorCode(),
+            "Cannot normalize TUSummary JSON: data entry at index %zu "
+            "does not contain a 'summary_data' array field",
+            DataIndex);
+      }
+
+      if (auto Err = normalizeSummaryData(*SummaryData, DataIndex)) {
+        return Err;
+      }
+    }
+
+    // Safe to dereference: all entries were validated above.
+    llvm::sort(Data, [](const json::Value &A, const json::Value &B) {
+      return *A.getAsObject()->getString("summary_name") <
+             *B.getAsObject()->getString("summary_name");
+    });
+
+    return llvm::Error::success();
+  }
+
+  static Expected<json::Value> normalizeTUSummaryJSON(json::Value Val) {
+    auto *Obj = Val.getAsObject();
+    if (!Obj) {
+      return createStringError(
+          inconvertibleErrorCode(),
+          "Cannot normalize TUSummary JSON: expected an object");
+    }
+
+    auto *IDTable = Obj->getArray("id_table");
+    if (!IDTable) {
+      return createStringError(inconvertibleErrorCode(),
+                               "Cannot normalize TUSummary JSON: 'id_table' "
+                               "field is either missing or has the wrong 
type");
+    }
+    if (auto Err = normalizeIDTable(*IDTable)) {
----------------
aviralg wrote:


I fixed my PR description. It was not good and caused misinterpretation. I have 
two points to make:

1. Since we use `std::map`, we are guaranteed automatic sorting of entries as 
long as we use `JSONFormat` read/write methods. Of course, if we switch to 
something like `std::unorderd_map` (or LLVM equivalents), we might lose this 
advantage. I don't know if we want to pay the `O(n lg n)` cost for sorting 
entries if we switch to these unordered container.
2. The normalizer is needed for the input JSON object, supplied as a string in 
the round-trip tests, since it is using arbitrary ordering for entries in 
various tables. 

I want the normalizer in the tests because I want the tests to be resilient and 
easy to write. They should not fail because we ordered the entries differently 
in the input JSON, or, because we switched to a different kind of map. 

https://github.com/llvm/llvm-project/pull/183241
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang][ssaf] Fix normalization of TUSummary JSON representation and strengthen round-trip tests (PR #183241)

Reply via email to