https://github.com/NagyDonat created 
https://github.com/llvm/llvm-project/pull/157846

Previously the checker `security.VAList` only tracked the set of the 
inintialized `va_list` objects; this commit replaces this with a mapping that 
can distinguish the "uninitialized" `va_list` objects from the "already 
released" ones. Moreover, a new "unknown" state is introduced to replace the 
slightly hacky solutions that checked the `Symbolic` nature of the `SVal` 
region.

In addition to sligthly improving the messages, this commit also prepares the 
ground for a follow-up change. I'm planning to introduce an "indeterminate" 
state (which needs `va_end` but cannot be otherwise used) to model the 
requirements of SEI CERT rule MSC39-C, which states:

> The va_list may be passed as an argument to another function, but
> calling va_arg() within that function causes the va_list to have an
> indeterminate value in the calling function. As a result, attempting
> to read variable arguments without reinitializing the va_list can have
> unexpected behavior.

From 73f621f1db437ba07e2de2deae9829ce524eb30f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.n...@ericsson.com>
Date: Wed, 10 Sep 2025 14:20:05 +0200
Subject: [PATCH] [analyzer] Improve messaging in security.VAList

Previously the checker `security.VAList` only tracked the set of the
inintialized `va_list` objects; this commit replaces this with a mapping
that can distinguish the "uninitialized" `va_list` objects from the
"already released" ones. Moreover, a new "unknown" state is introduced
to replace the slightly hacky solutions that checked the `Symbolic`
nature of the `SVal` region.

In addition to sligthly improving the messages, this commit also
prepares the ground for a follow-up change. I'm planning to introduce an
"indeterminate" state (which needs `va_end` but cannot be otherwise
used) to model the requirements of SEI CERT rule MSC39-C, which states:

> The va_list may be passed as an argument to another function, but
> calling va_arg() within that function causes the va_list to have an
> indeterminate value in the calling function. As a result, attempting
> to read variable arguments without reinitializing the va_list can have
> unexpected behavior.
---
 .../StaticAnalyzer/Checkers/VAListChecker.cpp | 228 +++++++++++-------
 .../Analysis/valist-uninitialized-no-undef.c  |   4 +-
 clang/test/Analysis/valist-uninitialized.c    |  16 +-
 clang/test/Analysis/valist-unterminated.c     |   4 +-
 4 files changed, 149 insertions(+), 103 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp
index fe36e3b879dc0..741be9ef790fd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/VAListChecker.cpp
@@ -18,11 +18,36 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "llvm/Support/FormatVariadic.h"
 
 using namespace clang;
 using namespace ento;
+using llvm::formatv;
 
-REGISTER_SET_WITH_PROGRAMSTATE(InitializedVALists, const MemRegion *)
+namespace {
+enum class VAListState {
+  Uninitialized,
+  Unknown,
+  Initialized,
+  Released,
+};
+
+constexpr llvm::StringLiteral StateNames[] = {
+    "uninitialized", "unknown", "initialized", "already released"};
+} // end anonymous namespace
+
+static StringRef describeState(const VAListState S) {
+  return StateNames[static_cast<int>(S)];
+}
+
+REGISTER_MAP_WITH_PROGRAMSTATE(VAListStateMap, const MemRegion *, VAListState)
+
+static VAListState getVAListState(ProgramStateRef State, const MemRegion *Reg) 
{
+  if (const VAListState *Res = State->get<VAListStateMap>(Reg))
+    return *Res;
+  return Reg->getSymbolicBase() ? VAListState::Unknown
+                                : VAListState::Uninitialized;
+}
 
 namespace {
 typedef SmallVector<const MemRegion *, 2> RegionVector;
@@ -48,7 +73,7 @@ class VAListChecker : public Checker<check::PreCall, 
check::PreStmt<VAArgExpr>,
 
 private:
   const MemRegion *getVAListAsRegion(SVal SV, const Expr *VAExpr,
-                                     bool &IsSymbolic, CheckerContext &C) 
const;
+                                     CheckerContext &C) const;
   const ExplodedNode *getStartCallSite(const ExplodedNode *N,
                                        const MemRegion *Reg) const;
 
@@ -57,8 +82,8 @@ class VAListChecker : public Checker<check::PreCall, 
check::PreStmt<VAArgExpr>,
   void reportLeaked(const RegionVector &Leaked, StringRef Msg1, StringRef Msg2,
                     CheckerContext &C, ExplodedNode *N) const;
 
-  void checkVAListStartCall(const CallEvent &Call, CheckerContext &C,
-                            bool IsCopy) const;
+  void checkVAListStartCall(const CallEvent &Call, CheckerContext &C) const;
+  void checkVAListCopyCall(const CallEvent &Call, CheckerContext &C) const;
   void checkVAListEndCall(const CallEvent &Call, CheckerContext &C) const;
 
   class VAListBugVisitor : public BugReporterVisitor {
@@ -118,41 +143,35 @@ const CallDescription 
VAListChecker::VaStart(CDM::CLibrary,
 void VAListChecker::checkPreCall(const CallEvent &Call,
                                  CheckerContext &C) const {
   if (VaStart.matches(Call))
-    checkVAListStartCall(Call, C, false);
+    checkVAListStartCall(Call, C);
   else if (VaCopy.matches(Call))
-    checkVAListStartCall(Call, C, true);
+    checkVAListCopyCall(Call, C);
   else if (VaEnd.matches(Call))
     checkVAListEndCall(Call, C);
   else {
     for (auto FuncInfo : VAListAccepters) {
       if (!FuncInfo.Func.matches(Call))
         continue;
-      bool Symbolic;
       const MemRegion *VAList =
           getVAListAsRegion(Call.getArgSVal(FuncInfo.ParamIndex),
-                            Call.getArgExpr(FuncInfo.ParamIndex), Symbolic, C);
+                            Call.getArgExpr(FuncInfo.ParamIndex), C);
       if (!VAList)
         return;
+      VAListState S = getVAListState(C.getState(), VAList);
 
-      if (C.getState()->contains<InitializedVALists>(VAList))
-        return;
-
-      // We did not see va_start call, but the source of the region is unknown.
-      // Be conservative and assume the best.
-      if (Symbolic)
+      if (S == VAListState::Initialized || S == VAListState::Unknown)
         return;
 
-      SmallString<80> Errmsg("Function '");
-      Errmsg += FuncInfo.Func.getFunctionName();
-      Errmsg += "' is called with an uninitialized va_list argument";
-      reportUninitializedAccess(VAList, Errmsg.c_str(), C);
+      std::string ErrMsg =
+          formatv("Function '{0}' is called with an {1} va_list argument",
+                  FuncInfo.Func.getFunctionName(), describeState(S));
+      reportUninitializedAccess(VAList, ErrMsg, C);
       break;
     }
   }
 }
 
 const MemRegion *VAListChecker::getVAListAsRegion(SVal SV, const Expr *E,
-                                                  bool &IsSymbolic,
                                                   CheckerContext &C) const {
   const MemRegion *Reg = SV.getAsRegion();
   if (!Reg)
@@ -168,7 +187,6 @@ const MemRegion *VAListChecker::getVAListAsRegion(SVal SV, 
const Expr *E,
     if (isa<ParmVarDecl>(DeclReg->getDecl()))
       Reg = C.getState()->getSVal(SV.castAs<Loc>()).getAsRegion();
   }
-  IsSymbolic = Reg && Reg->getBaseRegion()->getAs<SymbolicRegion>();
   // Some VarRegion based VA lists reach here as ElementRegions.
   const auto *EReg = dyn_cast_or_null<ElementRegion>(Reg);
   return (EReg && VAListModelledAsArray) ? EReg->getSuperRegion() : Reg;
@@ -178,52 +196,53 @@ void VAListChecker::checkPreStmt(const VAArgExpr *VAA,
                                  CheckerContext &C) const {
   ProgramStateRef State = C.getState();
   const Expr *ArgExpr = VAA->getSubExpr();
-  SVal VAListSVal = C.getSVal(ArgExpr);
-  bool Symbolic;
-  const MemRegion *VAList = getVAListAsRegion(VAListSVal, ArgExpr, Symbolic, 
C);
+  const MemRegion *VAList = getVAListAsRegion(C.getSVal(ArgExpr), ArgExpr, C);
   if (!VAList)
     return;
-  if (Symbolic)
+  VAListState S = getVAListState(C.getState(), VAList);
+  if (S == VAListState::Initialized || S == VAListState::Unknown)
     return;
-  if (!State->contains<InitializedVALists>(VAList))
-    reportUninitializedAccess(
-        VAList, "va_arg() is called on an uninitialized va_list", C);
+
+  std::string ErrMsg =
+      formatv("va_arg() is called on an {0} va_list", describeState(S));
+  reportUninitializedAccess(VAList, ErrMsg, C);
 }
 
 void VAListChecker::checkDeadSymbols(SymbolReaper &SR,
                                      CheckerContext &C) const {
   ProgramStateRef State = C.getState();
-  InitializedVAListsTy Tracked = State->get<InitializedVALists>();
+  VAListStateMapTy Tracked = State->get<VAListStateMap>();
   RegionVector Leaked;
-  for (const MemRegion *Reg : Tracked) {
+  for (const auto &[Reg, S] : Tracked) {
     if (SR.isLiveRegion(Reg))
       continue;
-    Leaked.push_back(Reg);
-    State = State->remove<InitializedVALists>(Reg);
+    if (S == VAListState::Initialized)
+      Leaked.push_back(Reg);
+    State = State->remove<VAListStateMap>(Reg);
   }
-  if (ExplodedNode *N = C.addTransition(State))
+  if (ExplodedNode *N = C.addTransition(State)) {
     reportLeaked(Leaked, "Initialized va_list", " is leaked", C, N);
+  }
 }
 
 // This function traverses the exploded graph backwards and finds the node 
where
-// the va_list is initialized. That node is used for uniquing the bug paths.
-// It is not likely that there are several different va_lists that belongs to
-// different stack frames, so that case is not yet handled.
+// the va_list becomes initialized. That node is used for uniquing the bug
+// paths. It is not likely that there are several different va_lists that
+// belongs to different stack frames, so that case is not yet handled.
 const ExplodedNode *
 VAListChecker::getStartCallSite(const ExplodedNode *N,
                                 const MemRegion *Reg) const {
   const LocationContext *LeakContext = N->getLocationContext();
   const ExplodedNode *StartCallNode = N;
 
-  bool FoundInitializedState = false;
+  bool SeenInitializedState = false;
 
   while (N) {
-    ProgramStateRef State = N->getState();
-    if (!State->contains<InitializedVALists>(Reg)) {
-      if (FoundInitializedState)
-        break;
-    } else {
-      FoundInitializedState = true;
+    VAListState S = getVAListState(N->getState(), Reg);
+    if (S == VAListState::Initialized) {
+      SeenInitializedState = true;
+    } else if (SeenInitializedState) {
+      break;
     }
     const LocationContext *NContext = N->getLocationContext();
     if (NContext == LeakContext || NContext->isParentOf(LeakContext))
@@ -274,71 +293,85 @@ void VAListChecker::reportLeaked(const RegionVector 
&Leaked, StringRef Msg1,
 }
 
 void VAListChecker::checkVAListStartCall(const CallEvent &Call,
-                                         CheckerContext &C, bool IsCopy) const 
{
-  bool Symbolic;
-  const MemRegion *VAList =
-      getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C);
-  if (!VAList)
+                                         CheckerContext &C) const {
+  const MemRegion *Arg =
+      getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C);
+  if (!Arg)
     return;
 
   ProgramStateRef State = C.getState();
+  VAListState ArgState = getVAListState(State, Arg);
 
-  if (IsCopy) {
-    const MemRegion *Arg2 =
-        getVAListAsRegion(Call.getArgSVal(1), Call.getArgExpr(1), Symbolic, C);
-    if (Arg2) {
-      if (VAList == Arg2) {
-        RegionVector Leaked{VAList};
-        if (ExplodedNode *N = C.addTransition(State))
-          reportLeaked(Leaked, "va_list", " is copied onto itself", C, N);
-        return;
-      }
-      if (!State->contains<InitializedVALists>(Arg2) && !Symbolic) {
-        if (State->contains<InitializedVALists>(VAList)) {
-          State = State->remove<InitializedVALists>(VAList);
-          RegionVector Leaked{VAList};
-          if (ExplodedNode *N = C.addTransition(State))
-            reportLeaked(Leaked, "Initialized va_list",
-                         " is overwritten by an uninitialized one", C, N);
-        } else {
-          reportUninitializedAccess(Arg2, "Uninitialized va_list is copied", 
C);
-        }
-        return;
-      }
-    }
-  }
-  if (State->contains<InitializedVALists>(VAList)) {
-    RegionVector Leaked{VAList};
+  if (ArgState == VAListState::Initialized) {
+    RegionVector Leaked{Arg};
     if (ExplodedNode *N = C.addTransition(State))
       reportLeaked(Leaked, "Initialized va_list", " is initialized again", C,
                    N);
     return;
   }
 
-  State = State->add<InitializedVALists>(VAList);
+  State = State->set<VAListStateMap>(Arg, VAListState::Initialized);
+  C.addTransition(State);
+}
+
+void VAListChecker::checkVAListCopyCall(const CallEvent &Call,
+                                        CheckerContext &C) const {
+  const MemRegion *Arg1 =
+      getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C);
+  const MemRegion *Arg2 =
+      getVAListAsRegion(Call.getArgSVal(1), Call.getArgExpr(1), C);
+  if (!Arg1 || !Arg2)
+    return;
+
+  ProgramStateRef State = C.getState();
+  if (Arg1 == Arg2) {
+    RegionVector Leaked{Arg1};
+    if (ExplodedNode *N = C.addTransition(State))
+      reportLeaked(Leaked, "va_list", " is copied onto itself", C, N);
+    return;
+  }
+  VAListState State1 = getVAListState(State, Arg1);
+  VAListState State2 = getVAListState(State, Arg2);
+  // Update the ProgramState by copying the state of Arg2 to Arg1.
+  State = State->set<VAListStateMap>(Arg1, State2);
+  if (State1 == VAListState::Initialized) {
+    RegionVector Leaked{Arg1};
+    std::string Msg2 =
+        formatv(" is overwritten by {0} {1} one",
+                (State2 == VAListState::Initialized) ? "another" : "an",
+                describeState(State2));
+    if (ExplodedNode *N = C.addTransition(State))
+      reportLeaked(Leaked, "Initialized va_list", Msg2, C, N);
+    return;
+  }
+  if (State2 != VAListState::Initialized && State2 != VAListState::Unknown) {
+    std::string Msg = formatv("{0} va_list is copied", describeState(State2));
+    Msg[0] = toupper(Msg[0]);
+    reportUninitializedAccess(Arg2, Msg, C);
+    return;
+  }
   C.addTransition(State);
 }
 
 void VAListChecker::checkVAListEndCall(const CallEvent &Call,
                                        CheckerContext &C) const {
-  bool Symbolic;
-  const MemRegion *VAList =
-      getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), Symbolic, C);
-  if (!VAList)
+  const MemRegion *Arg =
+      getVAListAsRegion(Call.getArgSVal(0), Call.getArgExpr(0), C);
+  if (!Arg)
     return;
 
-  // We did not see va_start call, but the source of the region is unknown.
-  // Be conservative and assume the best.
-  if (Symbolic)
-    return;
+  ProgramStateRef State = C.getState();
+  VAListState ArgState = getVAListState(State, Arg);
 
-  if (!C.getState()->contains<InitializedVALists>(VAList)) {
-    reportUninitializedAccess(
-        VAList, "va_end() is called on an uninitialized va_list", C);
+  if (ArgState != VAListState::Unknown &&
+      ArgState != VAListState::Initialized) {
+    std::string Msg = formatv("va_end() is called on an {0} va_list",
+                              describeState(ArgState));
+    reportUninitializedAccess(Arg, Msg, C);
     return;
   }
-  ProgramStateRef State = C.getState();
-  State = State->remove<InitializedVALists>(VAList);
+  // FIXME: Add a note tag that describes the state change.
+  State = State->set<VAListStateMap>(Arg, VAListState::Released);
   C.addTransition(State);
 }
 
@@ -351,13 +384,26 @@ PathDiagnosticPieceRef 
VAListChecker::VAListBugVisitor::VisitNode(
   if (!S)
     return nullptr;
 
+  VAListState After = getVAListState(State, Reg);
+  VAListState Before = getVAListState(StatePrev, Reg);
+  if (Before == After)
+    return nullptr;
+
   StringRef Msg;
-  if (State->contains<InitializedVALists>(Reg) &&
-      !StatePrev->contains<InitializedVALists>(Reg))
+  switch (After) {
+  case VAListState::Uninitialized:
+    Msg = "Copied uninitialized contents into the va_list";
+    break;
+  case VAListState::Unknown:
+    Msg = "Copied unknown contents into the va_list";
+    break;
+  case VAListState::Initialized:
     Msg = "Initialized va_list";
-  else if (!State->contains<InitializedVALists>(Reg) &&
-           StatePrev->contains<InitializedVALists>(Reg))
+    break;
+  case VAListState::Released:
     Msg = "Ended va_list";
+    break;
+  }
 
   if (Msg.empty())
     return nullptr;
diff --git a/clang/test/Analysis/valist-uninitialized-no-undef.c 
b/clang/test/Analysis/valist-uninitialized-no-undef.c
index b8add29fc4803..da007e677a98d 100644
--- a/clang/test/Analysis/valist-uninitialized-no-undef.c
+++ b/clang/test/Analysis/valist-uninitialized-no-undef.c
@@ -37,7 +37,7 @@ void call_vsprintf_bad(char *buffer, ...) {
   va_list va;
   va_start(va, buffer); // expected-note{{Initialized va_list}}
   va_end(va); // expected-note{{Ended va_list}}
-  vsprintf(buffer, "%s %d %d %lf %03d", va); // expected-warning{{Function 
'vsprintf' is called with an uninitialized va_list argument}}
-  // expected-note@-1{{Function 'vsprintf' is called with an uninitialized 
va_list argument}}
+  vsprintf(buffer, "%s %d %d %lf %03d", va); // expected-warning{{Function 
'vsprintf' is called with an already released va_list argument}}
+  // expected-note@-1{{Function 'vsprintf' is called with an already released 
va_list argument}}
 }
 
diff --git a/clang/test/Analysis/valist-uninitialized.c 
b/clang/test/Analysis/valist-uninitialized.c
index 689fc95c5a771..b06c413a2c888 100644
--- a/clang/test/Analysis/valist-uninitialized.c
+++ b/clang/test/Analysis/valist-uninitialized.c
@@ -23,8 +23,8 @@ int f2(int fst, ...) {
   va_list va;
   va_start(va, fst); // expected-note{{Initialized va_list}}
   va_end(va); // expected-note{{Ended va_list}}
-  return va_arg(va, int); // expected-warning{{va_arg() is called on an 
uninitialized va_list}}
-  // expected-note@-1{{va_arg() is called on an uninitialized va_list}}
+  return va_arg(va, int); // expected-warning{{va_arg() is called on an 
already released va_list}}
+  // expected-note@-1{{va_arg() is called on an already released va_list}}
 }
 
 void f3(int fst, ...) {
@@ -60,8 +60,8 @@ void f8(int *fst, ...) {
   va_list *y = &x;
   va_start(*y,fst); // expected-note{{Initialized va_list}}
   va_end(x); // expected-note{{Ended va_list}}
-  (void)va_arg(*y, int); //expected-warning{{va_arg() is called on an 
uninitialized va_list}}
-  // expected-note@-1{{va_arg() is called on an uninitialized va_list}}
+  (void)va_arg(*y, int); //expected-warning{{va_arg() is called on an already 
released va_list}}
+  // expected-note@-1{{va_arg() is called on an already released va_list}}
 }
 
 void reinitOk(int *fst, ...) {
@@ -82,8 +82,8 @@ void reinit3(int *fst, ...) {
   va_start(va, fst); // expected-note{{Initialized va_list}}
   (void)va_arg(va, int);
   va_end(va); // expected-note{{Ended va_list}}
-  (void)va_arg(va, int); //expected-warning{{va_arg() is called on an 
uninitialized va_list}}
-  // expected-note@-1{{va_arg() is called on an uninitialized va_list}}
+  (void)va_arg(va, int); //expected-warning{{va_arg() is called on an already 
released va_list}}
+  // expected-note@-1{{va_arg() is called on an already released va_list}}
 }
 
 void copyUnint(int fst, ...) {
@@ -102,8 +102,8 @@ void g2(int fst, ...) {
   va_list va;
   va_start(va, fst); // expected-note{{Initialized va_list}}
   va_end(va); // expected-note{{Ended va_list}}
-  va_end(va); // expected-warning{{va_end() is called on an uninitialized 
va_list}}
-  // expected-note@-1{{va_end() is called on an uninitialized va_list}}
+  va_end(va); // expected-warning{{va_end() is called on an already released 
va_list}}
+  // expected-note@-1{{va_end() is called on an already released va_list}}
 }
 
 void is_sink(int fst, ...) {
diff --git a/clang/test/Analysis/valist-unterminated.c 
b/clang/test/Analysis/valist-unterminated.c
index cc892684b15fc..e1dbb1ba458c8 100644
--- a/clang/test/Analysis/valist-unterminated.c
+++ b/clang/test/Analysis/valist-unterminated.c
@@ -101,8 +101,8 @@ void recopy(int fst, ...) {
   va_list va, va2;
   va_start(va, fst);
   va_copy(va2, va); // expected-note{{Initialized va_list}}
-  va_copy(va2, va); // expected-warning{{Initialized va_list 'va2' is 
initialized again}}
-  // expected-note@-1{{Initialized va_list 'va2' is initialized again}}
+  va_copy(va2, va); // expected-warning{{Initialized va_list 'va2' is 
overwritten by another initialized one}}
+  // expected-note@-1{{Initialized va_list 'va2' is overwritten by another 
initialized one}}
   va_end(va);
   va_end(va2);
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to