vlad.tsyrklevich updated this revision to Diff 98905.
vlad.tsyrklevich added a comment.
Some stylistic & comment updates.
https://reviews.llvm.org/D30909
Files:
include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h
lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
lib/StaticAnalyzer/Core/ProgramState.cpp
lib/StaticAnalyzer/Core/RegionStore.cpp
test/Analysis/taint-generic.c
Index: test/Analysis/taint-generic.c
===================================================================
--- test/Analysis/taint-generic.c
+++ test/Analysis/taint-generic.c
@@ -192,20 +192,41 @@
void testStructArray() {
struct {
- char buf[16];
- struct {
- int length;
- } st[1];
- } tainted;
+ int length;
+ } tainted[4];
- char buffer[16];
+ char dstbuf[16], srcbuf[16];
int sock;
sock = socket(AF_INET, SOCK_STREAM, 0);
- read(sock, &tainted.buf[0], sizeof(tainted.buf));
- read(sock, &tainted.st[0], sizeof(tainted.st));
- // FIXME: tainted.st[0].length should be marked tainted
- __builtin_memcpy(buffer, tainted.buf, tainted.st[0].length); // no-warning
+ __builtin_memset(srcbuf, 0, sizeof(srcbuf));
+
+ read(sock, &tainted[0], sizeof(tainted));
+ __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}
+
+ __builtin_memset(&tainted, 0, sizeof(tainted));
+ read(sock, &tainted, sizeof(tainted));
+ __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}
+
+ __builtin_memset(&tainted, 0, sizeof(tainted));
+ // If we taint element 1, we should not raise an alert on taint for element 0 or element 2
+ read(sock, &tainted[1], sizeof(tainted));
+ __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // no-warning
+ __builtin_memcpy(dstbuf, srcbuf, tainted[2].length); // no-warning
+}
+
+void testUnion() {
+ union {
+ int x;
+ char y[4];
+ } tainted;
+
+ char buffer[4];
+
+ int sock = socket(AF_INET, SOCK_STREAM, 0);
+ read(sock, &tainted.y, sizeof(tainted.y));
+ // FIXME: overlapping regions aren't detected by isTainted yet
+ __builtin_memcpy(buffer, tainted.y, tainted.x);
}
int testDivByZero() {
Index: lib/StaticAnalyzer/Core/RegionStore.cpp
===================================================================
--- lib/StaticAnalyzer/Core/RegionStore.cpp
+++ lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -496,7 +496,10 @@
Optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
RegionBindingsRef B = getRegionBindings(S);
- return B.getDefaultBinding(R);
+ // Default bindings are always applied over a base region so look up the
+ // base region's default binding, otherwise the lookup will fail when R
+ // is at an offset from R->getBaseRegion().
+ return B.getDefaultBinding(R->getBaseRegion());
}
SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType());
Index: lib/StaticAnalyzer/Core/ProgramState.cpp
===================================================================
--- lib/StaticAnalyzer/Core/ProgramState.cpp
+++ lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -644,15 +644,33 @@
if (const Expr *E = dyn_cast_or_null<Expr>(S))
S = E->IgnoreParens();
- SymbolRef Sym = getSVal(S, LCtx).getAsSymbol();
+ return addTaint(getSVal(S, LCtx), Kind);
+}
+
+ProgramStateRef ProgramState::addTaint(SVal V,
+ TaintTagType Kind) const {
+ SymbolRef Sym = V.getAsSymbol();
if (Sym)
return addTaint(Sym, Kind);
- const MemRegion *R = getSVal(S, LCtx).getAsRegion();
- addTaint(R, Kind);
+ // If the SVal represents a structure, try to mass-taint all values within the
+ // structure. For now it only works efficiently on lazy compound values that
+ // were conjured during a conservative evaluation of a function - either as
+ // return values of functions that return structures or arrays by value, or as
+ // values of structures or arrays passed into the function by reference,
+ // directly or through pointer aliasing. Such lazy compound values are
+ // characterized by having exactly one binding in their captured store within
+ // their parent region, which is a conjured symbol default-bound to the base
+ // region of the parent region.
+ if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
+ if (Optional<SVal> binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) {
+ if (SymbolRef Sym = binding->getAsSymbol())
+ return addPartialTaint(Sym, LCV->getRegion(), Kind);
+ }
+ }
- // Cannot add taint, so just return the state.
- return this;
+ const MemRegion *R = V.getAsRegion();
+ return addTaint(R, Kind);
}
ProgramStateRef ProgramState::addTaint(const MemRegion *R,
@@ -674,6 +692,28 @@
return NewState;
}
+ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym,
+ const SubRegion *SubRegion,
+ TaintTagType Kind) const {
+ // Ignore partial taint if the entire parent symbol is already tainted.
+ if (contains<TaintMap>(ParentSym) && *get<TaintMap>(ParentSym) == Kind)
+ return this;
+
+ // Partial taint applies if only a portion of the symbol is tainted.
+ if (SubRegion == SubRegion->getBaseRegion())
+ return addTaint(ParentSym, Kind);
+
+ TaintedSubRegionsRef TaintedSubRegions(0, TSRFactory.getTreeFactory());
+ if (const TaintedSubRegionsRef *SavedTaintedRegions =
+ get<DerivedSymTaint>(ParentSym))
+ TaintedSubRegions = *SavedTaintedRegions;
+
+ TaintedSubRegions = TaintedSubRegions.add(SubRegion, Kind);
+ ProgramStateRef NewState = set<DerivedSymTaint>(ParentSym, TaintedSubRegions);
+ assert(NewState);
+ return NewState;
+}
+
bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind) const {
if (const Expr *E = dyn_cast_or_null<Expr>(S))
@@ -714,31 +754,54 @@
return false;
// Traverse all the symbols this symbol depends on to see if any are tainted.
- bool Tainted = false;
for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end();
SI != SE; ++SI) {
if (!isa<SymbolData>(*SI))
continue;
- const TaintTagType *Tag = get<TaintMap>(*SI);
- Tainted = (Tag && *Tag == Kind);
+ if (const TaintTagType *Tag = get<TaintMap>(*SI)) {
+ if (*Tag == Kind)
+ return true;
+ }
- // If this is a SymbolDerived with a tainted parent, it's also tainted.
- if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI))
- Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind);
+ if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) {
+ // If this is a SymbolDerived with a tainted parent, it's also tainted.
+ if (isTainted(SD->getParentSymbol(), Kind))
+ return true;
+
+ // If this is a SymbolDerived with the same parent symbol as another
+ // tainted SymbolDerived and a region that's a sub-region of that tainted
+ // symbol, it's also tainted.
+ if (const TaintedSubRegionsRef *SymRegions =
+ get<DerivedSymTaint>(SD->getParentSymbol())) {
+ const TypedValueRegion *R = SD->getRegion();
+ for (TaintedSubRegionsRef::iterator I = SymRegions->begin(),
+ E = SymRegions->end();
+ I != E; ++I) {
+ // FIXME: The logic to identify tainted regions could be more
+ // complete. For example, this would not currently identify
+ // overlapping fields in a union as tainted. To identify this we can
+ // check for overlapping/nested byte offsets.
+ if (Kind == I->second &&
+ (R == I->first || R->isSubRegionOf(I->first)))
+ return true;
+ }
+ }
+ }
// If memory region is tainted, data is also tainted.
- if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI))
- Tainted = Tainted || isTainted(SRV->getRegion(), Kind);
-
- // If If this is a SymbolCast from a tainted value, it's also tainted.
- if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI))
- Tainted = Tainted || isTainted(SC->getOperand(), Kind);
+ if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
+ if (isTainted(SRV->getRegion(), Kind))
+ return true;
+ }
- if (Tainted)
- return true;
+ // If this is a SymbolCast from a tainted value, it's also tainted.
+ if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) {
+ if (isTainted(SC->getOperand(), Kind))
+ return true;
+ }
}
- return Tainted;
+ return false;
}
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -65,21 +65,8 @@
/// and thus, is tainted.
static bool isStdin(const Expr *E, CheckerContext &C);
- /// This is called from getPointedToSymbol() to resolve symbol references for
- /// the region underlying a LazyCompoundVal. This is the default binding
- /// for the LCV, which could be a conjured symbol from a function call that
- /// initialized the region. It only returns the conjured symbol if the LCV
- /// covers the entire region, e.g. we avoid false positives by not returning
- /// a default bindingc for an entire struct if the symbol for only a single
- /// field or element within it is requested.
- // TODO: Return an appropriate symbol for sub-fields/elements of an LCV so
- // that they are also appropriately tainted.
- static SymbolRef getLCVSymbol(CheckerContext &C,
- nonloc::LazyCompoundVal &LCV);
-
- /// \brief Given a pointer argument, get the symbol of the value it contains
- /// (points to).
- static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
+ /// \brief Given a pointer argument, return the value it points to.
+ static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
/// Functions defining the attack surface.
typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
@@ -186,9 +173,14 @@
static inline bool isTaintedOrPointsToTainted(const Expr *E,
ProgramStateRef State,
CheckerContext &C) {
- return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
- (E->getType().getTypePtr()->isPointerType() &&
- State->isTainted(getPointedToSymbol(C, E))));
+ if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
+ return true;
+
+ if (!E->getType().getTypePtr()->isPointerType())
+ return false;
+
+ Optional<SVal> V = getPointedToSVal(C, E);
+ return (V && State->isTainted(*V));
}
/// \brief Pre-process a function which propagates taint according to the
@@ -400,9 +392,9 @@
if (CE->getNumArgs() < (ArgNum + 1))
return false;
const Expr* Arg = CE->getArg(ArgNum);
- SymbolRef Sym = getPointedToSymbol(C, Arg);
- if (Sym)
- State = State->addTaint(Sym);
+ Optional<SVal> V = getPointedToSVal(C, Arg);
+ if (V)
+ State = State->addTaint(*V);
}
// Clear up the taint info from the state.
@@ -473,47 +465,20 @@
return false;
}
-SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C,
- nonloc::LazyCompoundVal &LCV) {
- StoreManager &StoreMgr = C.getStoreManager();
-
- // getLCVSymbol() is reached in a PostStmt so we can always expect a default
- // binding to exist if one is present.
- if (Optional<SVal> binding = StoreMgr.getDefaultBinding(LCV)) {
- SymbolRef Sym = binding->getAsSymbol();
- if (!Sym)
- return nullptr;
-
- // If the LCV covers an entire base region return the default conjured symbol.
- if (LCV.getRegion() == LCV.getRegion()->getBaseRegion())
- return Sym;
- }
-
- // Otherwise, return a nullptr as there's not yet a functional way to taint
- // sub-regions of LCVs.
- return nullptr;
-}
-
-SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
- const Expr* Arg) {
+Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
+ const Expr* Arg) {
ProgramStateRef State = C.getState();
SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
if (AddrVal.isUnknownOrUndef())
- return nullptr;
+ return None;
Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
if (!AddrLoc)
- return nullptr;
+ return None;
const PointerType *ArgTy =
dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
- SVal Val = State->getSVal(*AddrLoc,
- ArgTy ? ArgTy->getPointeeType(): QualType());
-
- if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>())
- return getLCVSymbol(C, *LCV);
-
- return Val.getAsSymbol();
+ return State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType());
}
ProgramStateRef
@@ -633,9 +598,9 @@
// The arguments are pointer arguments. The data they are pointing at is
// tainted after the call.
const Expr* Arg = CE->getArg(i);
- SymbolRef Sym = getPointedToSymbol(C, Arg);
- if (Sym)
- State = State->addTaint(Sym);
+ Optional<SVal> V = getPointedToSVal(C, Arg);
+ if (V)
+ State = State->addTaint(*V);
}
return State;
}
@@ -710,10 +675,10 @@
// Check for taint.
ProgramStateRef State = C.getState();
- const SymbolRef PointedToSym = getPointedToSymbol(C, E);
+ Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
SVal TaintedSVal;
- if (State->isTainted(PointedToSym))
- TaintedSVal = nonloc::SymbolVal(PointedToSym);
+ if (PointedToSVal && State->isTainted(*PointedToSVal))
+ TaintedSVal = *PointedToSVal;
else if (State->isTainted(E, C.getLocationContext()))
TaintedSVal = C.getSVal(E);
else
Index: include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h
===================================================================
--- include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h
+++ include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h
@@ -35,6 +35,16 @@
static void *GDMIndex() { static int index = 0; return &index; }
};
+/// The GDM component mapping derived symbols' parent symbols to their
+/// underlying regions. This is used to efficiently check whether a symbol is
+/// tainted when it represents a sub-region of a tainted symbol.
+struct DerivedSymTaint {};
+typedef llvm::ImmutableMap<SymbolRef, TaintedSubRegionsRef> DerivedSymTaintImpl;
+template<> struct ProgramStateTrait<DerivedSymTaint>
+ : public ProgramStatePartialTrait<DerivedSymTaintImpl> {
+ static void *GDMIndex() { static int index; return &index; }
+};
+
class TaintManager {
TaintManager() {}
Index: include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
===================================================================
--- include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
+++ include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
@@ -43,6 +43,9 @@
ProgramStateManager &, SubEngine *);
typedef std::unique_ptr<StoreManager>(*StoreManagerCreator)(
ProgramStateManager &);
+typedef llvm::ImmutableMap<const SubRegion*, TaintTagType> TaintedSubRegions;
+typedef llvm::ImmutableMapRef<const SubRegion*, TaintTagType>
+ TaintedSubRegionsRef;
//===----------------------------------------------------------------------===//
// ProgramStateTrait - Traits used by the Generic Data Map of a ProgramState.
@@ -87,6 +90,7 @@
Store store; // Maps a location to its current value.
GenericDataMap GDM; // Custom data stored by a client of this class.
unsigned refCount;
+ TaintedSubRegions::Factory TSRFactory;
/// makeWithStore - Return a ProgramState with the same values as the current
/// state with the exception of using the specified Store.
@@ -343,14 +347,25 @@
ProgramStateRef addTaint(const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind = TaintTagGeneric) const;
+ /// Create a new state in which the value is marked as tainted.
+ ProgramStateRef addTaint(SVal V, TaintTagType Kind = TaintTagGeneric) const;
+
/// Create a new state in which the symbol is marked as tainted.
ProgramStateRef addTaint(SymbolRef S,
TaintTagType Kind = TaintTagGeneric) const;
/// Create a new state in which the region symbol is marked as tainted.
ProgramStateRef addTaint(const MemRegion *R,
TaintTagType Kind = TaintTagGeneric) const;
+ /// Create a new state in a which a sub-region of a given symbol is tainted.
+ /// This might be necessary when referring to regions that can not have an
+ /// individual symbol, e.g. if they are represented by the default binding of
+ /// a LazyCompoundVal.
+ ProgramStateRef addPartialTaint(SymbolRef ParentSym,
+ const SubRegion *SubRegion,
+ TaintTagType Kind = TaintTagGeneric) const;
+
/// Check if the statement is tainted in the current state.
bool isTainted(const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind = TaintTagGeneric) const;
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits