Author: Henry Jiang
Date: 2026-04-06T12:10:26-07:00
New Revision: 412d6941e356f8fa01c8d5fca396881290e16436

URL: 
https://github.com/llvm/llvm-project/commit/412d6941e356f8fa01c8d5fca396881290e16436
DIFF: 
https://github.com/llvm/llvm-project/commit/412d6941e356f8fa01c8d5fca396881290e16436.diff

LOG: [VFS] Guard against null key/value nodes when parsing YAML overlay 
(#190506)

When a VFS overlay YAML file contains malformed content such as tabs,
the YAML parser can produce KeyValueNode entries where `getKey` returns
nullptr. The VFS overlay parser then passes the nullptr to
`parseScalarString`, which then calls dyn_cast.

Switch to `dyn_cast_if_present` for the above callsites and a few more.

Added: 
    clang/test/VFS/Inputs/invalid-key.yaml
    clang/test/VFS/Inputs/invalid-top-level-key.yaml

Modified: 
    clang/lib/CrossTU/CrossTranslationUnit.cpp
    clang/lib/Tooling/JSONCompilationDatabase.cpp
    clang/test/VFS/parse-errors.c
    llvm/lib/Remarks/YAMLRemarkParser.cpp
    llvm/lib/Support/VirtualFileSystem.cpp
    llvm/lib/Support/YAMLTraits.cpp
    llvm/lib/Transforms/Utils/SymbolRewriter.cpp
    llvm/tools/sancov/sancov.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CrossTU/CrossTranslationUnit.cpp 
b/clang/lib/CrossTU/CrossTranslationUnit.cpp
index a911fa4e1813f..248c6320bc61b 100644
--- a/clang/lib/CrossTU/CrossTranslationUnit.cpp
+++ b/clang/lib/CrossTU/CrossTranslationUnit.cpp
@@ -773,7 +773,8 @@ parseInvocationList(StringRef FileContent, 
llvm::sys::path::Style PathStyle,
 
   for (auto &NextMapping : *Mappings) {
     /// The keys should be strings, which represent a source-file path.
-    auto *Key = dyn_cast<llvm::yaml::ScalarNode>(NextMapping.getKey());
+    auto *Key =
+        dyn_cast_if_present<llvm::yaml::ScalarNode>(NextMapping.getKey());
     if (!Key)
       return WrongFormatError(NextMapping.getKey());
 
@@ -792,7 +793,8 @@ parseInvocationList(StringRef FileContent, 
llvm::sys::path::Style PathStyle,
 
     /// The values should be sequences of strings, each representing a part of
     /// the invocation.
-    auto *Args = dyn_cast<llvm::yaml::SequenceNode>(NextMapping.getValue());
+    auto *Args =
+        dyn_cast_if_present<llvm::yaml::SequenceNode>(NextMapping.getValue());
     if (!Args)
       return WrongFormatError(NextMapping.getValue());
 

diff  --git a/clang/lib/Tooling/JSONCompilationDatabase.cpp 
b/clang/lib/Tooling/JSONCompilationDatabase.cpp
index 31d39076749f8..0efa75970d986 100644
--- a/clang/lib/Tooling/JSONCompilationDatabase.cpp
+++ b/clang/lib/Tooling/JSONCompilationDatabase.cpp
@@ -347,7 +347,8 @@ bool JSONCompilationDatabase::parse(std::string 
&ErrorMessage) {
     llvm::yaml::ScalarNode *File = nullptr;
     llvm::yaml::ScalarNode *Output = nullptr;
     for (auto& NextKeyValue : *Object) {
-      auto *KeyString = 
dyn_cast<llvm::yaml::ScalarNode>(NextKeyValue.getKey());
+      auto *KeyString =
+          dyn_cast_if_present<llvm::yaml::ScalarNode>(NextKeyValue.getKey());
       if (!KeyString) {
         ErrorMessage = "Expected strings as key.";
         return false;

diff  --git a/clang/test/VFS/Inputs/invalid-key.yaml 
b/clang/test/VFS/Inputs/invalid-key.yaml
new file mode 100644
index 0000000000000..3d873d1a163b2
--- /dev/null
+++ b/clang/test/VFS/Inputs/invalid-key.yaml
@@ -0,0 +1,9 @@
+# NOTE: This file contains an intentional tab character that triggers a YAML
+# parse failure. Do not replace tabs with spaces.
+version: 0
+redirecting-with: fallthrough
+roots:
+  - type: directory-remap
+       name: test
+    external-contents: test
+

diff  --git a/clang/test/VFS/Inputs/invalid-top-level-key.yaml 
b/clang/test/VFS/Inputs/invalid-top-level-key.yaml
new file mode 100644
index 0000000000000..dd2071a9f6739
--- /dev/null
+++ b/clang/test/VFS/Inputs/invalid-top-level-key.yaml
@@ -0,0 +1,5 @@
+# NOTE: This file contains an intentional tab character that triggers a YAML
+# parse failure. Do not replace tabs with spaces.
+version: 0
+       redirecting-with: fallthrough
+roots: []

diff  --git a/clang/test/VFS/parse-errors.c b/clang/test/VFS/parse-errors.c
index 8aa208438bcca..88f59a4c61067 100644
--- a/clang/test/VFS/parse-errors.c
+++ b/clang/test/VFS/parse-errors.c
@@ -20,3 +20,9 @@
 // RUN: not %clang_cc1 -ivfsoverlay %S/Inputs/redirect-and-fallthrough.yaml 
-fsyntax-only %s 2>&1 | FileCheck -check-prefix=CHECK-EXCLUSIVE-KEYS %s
 // CHECK-EXCLUSIVE-KEYS: 'fallthrough' and 'redirecting-with' are mutually 
exclusive
 // CHECK-EXCLUSIVE-KEYS: invalid virtual filesystem overlay file
+
+// RUN: not %clang_cc1 -ivfsoverlay %S/Inputs/invalid-key.yaml -fsyntax-only 
%s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-KEY %s
+// CHECK-INVALID-KEY: invalid virtual filesystem overlay file
+
+// RUN: not %clang_cc1 -ivfsoverlay %S/Inputs/invalid-top-level-key.yaml 
-fsyntax-only %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-TOP %s
+// CHECK-INVALID-TOP: invalid virtual filesystem overlay file

diff  --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp 
b/llvm/lib/Remarks/YAMLRemarkParser.cpp
index a8b1c0cc29ad6..33f659eaaa49b 100644
--- a/llvm/lib/Remarks/YAMLRemarkParser.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp
@@ -218,7 +218,8 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
       else
         return MaybeLoc.takeError();
     } else if (KeyName == "Args") {
-      auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
+      auto *Args =
+          dyn_cast_if_present<yaml::SequenceNode>(RemarkField.getValue());
       if (!Args)
         return error("wrong value type for key.", RemarkField);
 
@@ -257,19 +258,19 @@ Expected<Type> 
YAMLRemarkParser::parseType(yaml::MappingNode &Node) {
 }
 
 Expected<StringRef> YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) {
-  if (auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey()))
+  if (auto *Key = dyn_cast_if_present<yaml::ScalarNode>(Node.getKey()))
     return Key->getRawValue();
 
   return error("key is not a string.", Node);
 }
 
 Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
-  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+  auto *Value = dyn_cast_if_present<yaml::ScalarNode>(Node.getValue());
   yaml::BlockScalarNode *ValueBlock;
   StringRef Result;
   if (!Value) {
     // Try to parse the value as a block node.
-    ValueBlock = dyn_cast<yaml::BlockScalarNode>(Node.getValue());
+    ValueBlock = dyn_cast_if_present<yaml::BlockScalarNode>(Node.getValue());
     if (!ValueBlock)
       return error("expected a value of scalar type.", Node);
     Result = ValueBlock->getValue();
@@ -284,7 +285,7 @@ Expected<StringRef> 
YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
 
 Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
   SmallVector<char, 4> Tmp;
-  auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+  auto *Value = dyn_cast_if_present<yaml::ScalarNode>(Node.getValue());
   if (!Value)
     return error("expected a value of scalar type.", Node);
   unsigned UnsignedValue = 0;
@@ -295,7 +296,7 @@ Expected<unsigned> 
YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
 
 Expected<RemarkLocation>
 YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) {
-  auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
+  auto *DebugLoc = dyn_cast_if_present<yaml::MappingNode>(Node.getValue());
   if (!DebugLoc)
     return error("expected a value of mapping type.", Node);
 

diff  --git a/llvm/lib/Support/VirtualFileSystem.cpp 
b/llvm/lib/Support/VirtualFileSystem.cpp
index 8a7ca35a8dd25..69f3bb8582b87 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -1668,7 +1668,7 @@ class llvm::vfs::RedirectingFileSystemParser {
   // false on error
   bool parseScalarString(yaml::Node *N, StringRef &Result,
                          SmallVectorImpl<char> &Storage) {
-    const auto *S = dyn_cast<yaml::ScalarNode>(N);
+    const auto *S = dyn_cast_if_present<yaml::ScalarNode>(N);
 
     if (!S) {
       error(N, "expected string");
@@ -1913,7 +1913,7 @@ class llvm::vfs::RedirectingFileSystemParser {
           return nullptr;
         }
         ContentsField = CF_List;
-        auto *Contents = dyn_cast<yaml::SequenceNode>(I.getValue());
+        auto *Contents = dyn_cast_if_present<yaml::SequenceNode>(I.getValue());
         if (!Contents) {
           // FIXME: this is only for directories, what about files?
           error(I.getValue(), "expected array");
@@ -2115,7 +2115,7 @@ class llvm::vfs::RedirectingFileSystemParser {
         return false;
 
       if (Key == "roots") {
-        auto *Roots = dyn_cast<yaml::SequenceNode>(I.getValue());
+        auto *Roots = dyn_cast_if_present<yaml::SequenceNode>(I.getValue());
         if (!Roots) {
           error(I.getValue(), "expected array");
           return false;

diff  --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index 95a41eafdf5e4..d24e95e1e7487 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -430,7 +430,7 @@ Input::HNode *Input::createHNodes(Node *N) {
     auto mapHNode = new (MapHNodeAllocator.Allocate()) MapHNode(N);
     for (KeyValueNode &KVN : *Map) {
       Node *KeyNode = KVN.getKey();
-      ScalarNode *Key = dyn_cast_or_null<ScalarNode>(KeyNode);
+      ScalarNode *Key = dyn_cast_if_present<ScalarNode>(KeyNode);
       Node *Value = KVN.getValue();
       if (!Key || !Value) {
         if (!Key)

diff  --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp 
b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index 6319fd524ff0f..4f361c3eb6720 100644
--- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -295,13 +295,13 @@ bool RewriteMapParser::parseEntry(yaml::Stream &YS, 
yaml::KeyValueNode &Entry,
   SmallString<32> KeyStorage;
   StringRef RewriteType;
 
-  Key = dyn_cast<yaml::ScalarNode>(Entry.getKey());
+  Key = dyn_cast_if_present<yaml::ScalarNode>(Entry.getKey());
   if (!Key) {
     YS.printError(Entry.getKey(), "rewrite type must be a scalar");
     return false;
   }
 
-  Value = dyn_cast<yaml::MappingNode>(Entry.getValue());
+  Value = dyn_cast_if_present<yaml::MappingNode>(Entry.getValue());
   if (!Value) {
     YS.printError(Entry.getValue(), "rewrite descriptor must be a map");
     return false;
@@ -335,13 +335,13 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, 
yaml::ScalarNode *K,
     SmallString<32> ValueStorage;
     StringRef KeyValue;
 
-    Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+    Key = dyn_cast_if_present<yaml::ScalarNode>(Field.getKey());
     if (!Key) {
       YS.printError(Field.getKey(), "descriptor key must be a scalar");
       return false;
     }
 
-    Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+    Value = dyn_cast_if_present<yaml::ScalarNode>(Field.getValue());
     if (!Value) {
       YS.printError(Field.getValue(), "descriptor value must be a scalar");
       return false;
@@ -408,13 +408,13 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, 
yaml::ScalarNode *K,
     SmallString<32> ValueStorage;
     StringRef KeyValue;
 
-    Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+    Key = dyn_cast_if_present<yaml::ScalarNode>(Field.getKey());
     if (!Key) {
       YS.printError(Field.getKey(), "descriptor Key must be a scalar");
       return false;
     }
 
-    Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+    Value = dyn_cast_if_present<yaml::ScalarNode>(Field.getValue());
     if (!Value) {
       YS.printError(Field.getValue(), "descriptor value must be a scalar");
       return false;
@@ -475,13 +475,13 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, 
yaml::ScalarNode *K,
     SmallString<32> ValueStorage;
     StringRef KeyValue;
 
-    Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
+    Key = dyn_cast_if_present<yaml::ScalarNode>(Field.getKey());
     if (!Key) {
       YS.printError(Field.getKey(), "descriptor key must be a scalar");
       return false;
     }
 
-    Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
+    Value = dyn_cast_if_present<yaml::ScalarNode>(Field.getValue());
     if (!Value) {
       YS.printError(Field.getValue(), "descriptor value must be a scalar");
       return false;

diff  --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp
index 931f87731681a..45caa5f8c9933 100644
--- a/llvm/tools/sancov/sancov.cpp
+++ b/llvm/tools/sancov/sancov.cpp
@@ -390,7 +390,7 @@ static void operator<<(json::OStream &W, const 
SymbolizedCoverage &C) {
 
 static std::string parseScalarString(yaml::Node *N) {
   SmallString<64> StringStorage;
-  yaml::ScalarNode *S = dyn_cast<yaml::ScalarNode>(N);
+  yaml::ScalarNode *S = dyn_cast_if_present<yaml::ScalarNode>(N);
   failIf(!S, "expected string");
   return std::string(S->getValue(StringStorage));
 }
@@ -419,7 +419,7 @@ SymbolizedCoverage::read(const std::string &InputFile) {
 
     if (Key == "covered-points") {
       yaml::SequenceNode *Points =
-          dyn_cast<yaml::SequenceNode>(KVNode.getValue());
+          dyn_cast_if_present<yaml::SequenceNode>(KVNode.getValue());
       failIf(!Points, "expected array: " + InputFile);
 
       for (auto I = Points->begin(), E = Points->end(); I != E; ++I) {
@@ -429,21 +429,21 @@ SymbolizedCoverage::read(const std::string &InputFile) {
       Coverage->BinaryHash = parseScalarString(KVNode.getValue());
     } else if (Key == "point-symbol-info") {
       yaml::MappingNode *PointSymbolInfo =
-          dyn_cast<yaml::MappingNode>(KVNode.getValue());
+          dyn_cast_if_present<yaml::MappingNode>(KVNode.getValue());
       failIf(!PointSymbolInfo, "expected mapping node: " + InputFile);
 
       for (auto &FileKVNode : *PointSymbolInfo) {
         auto Filename = parseScalarString(FileKVNode.getKey());
 
         yaml::MappingNode *FileInfo =
-            dyn_cast<yaml::MappingNode>(FileKVNode.getValue());
+            dyn_cast_if_present<yaml::MappingNode>(FileKVNode.getValue());
         failIf(!FileInfo, "expected mapping node: " + InputFile);
 
         for (auto &FunctionKVNode : *FileInfo) {
           auto FunctionName = parseScalarString(FunctionKVNode.getKey());
 
           yaml::MappingNode *FunctionInfo =
-              dyn_cast<yaml::MappingNode>(FunctionKVNode.getValue());
+              
dyn_cast_if_present<yaml::MappingNode>(FunctionKVNode.getValue());
           failIf(!FunctionInfo, "expected mapping node: " + InputFile);
 
           for (auto &PointKVNode : *FunctionInfo) {


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to