Just a few comments inline.. On Fri, Aug 5, 2011 at 4:18 PM, Kaelyn Uhrain <[email protected]> wrote:
> Author: rikka > Date: Fri Aug 5 18:18:04 2011 > New Revision: 136997 > > URL: http://llvm.org/viewvc/llvm-project?rev=136997&view=rev > Log: > Perform array bounds checking in more situations and properly handle > special > case situations with the unary operators & and *. Also extend the array > bounds > checking to work with pointer arithmetic; the pointer arithemtic checking > can > be turned on using -Warray-bounds-pointer-arithmetic. > > The changes to where CheckArrayAccess gets called is based on some trial & > error and a bunch of digging through source code and gdb backtraces in > order > to have the check performed under as many situations as possible (such as > for > variable initializers, arguments to function calls, and within conditional > in > addition to the simpler cases of the operands to binary and unary operator) > while not being called--and triggering warnings--more than once for a given > ArraySubscriptExpr. > > Modified: > cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td > cfe/trunk/include/clang/Sema/Sema.h > cfe/trunk/lib/Sema/SemaChecking.cpp > cfe/trunk/lib/Sema/SemaExpr.cpp > cfe/trunk/lib/Sema/SemaExprCXX.cpp > cfe/trunk/test/SemaCXX/array-bounds.cpp > > Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=136997&r1=136996&r2=136997&view=diff > > ============================================================================== > --- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original) > +++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Fri Aug 5 > 18:18:04 2011 > @@ -4085,13 +4085,19 @@ > def err_defaulted_move_unsupported : Error< > "defaulting move functions not yet supported">; > > +def warn_ptr_arith_precedes_bounds : Warning< > + "the pointer decremented by %0 refers before the beginning of the > array">, > + InGroup<DiagGroup<"array-bounds-pointer-arithmetic">>, DefaultIgnore; > +def warn_ptr_arith_exceeds_bounds : Warning< > + "the pointer incremented by %0 refers past the end of the array (that " > + "contains %1 element%s2)">, > + InGroup<DiagGroup<"array-bounds-pointer-arithmetic">>, DefaultIgnore; > def warn_array_index_precedes_bounds : Warning< > "array index of '%0' indexes before the beginning of the array">, > InGroup<DiagGroup<"array-bounds">>; > def warn_array_index_exceeds_bounds : Warning< > "array index of '%0' indexes past the end of an array (that contains %1 " > - "%select{element|elements}2)">, > - InGroup<DiagGroup<"array-bounds">>; > + "element%s2)">, InGroup<DiagGroup<"array-bounds">>; > def note_array_index_out_of_bounds : Note< > "array %0 declared here">; > > > Modified: cfe/trunk/include/clang/Sema/Sema.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=136997&r1=136996&r2=136997&view=diff > > ============================================================================== > --- cfe/trunk/include/clang/Sema/Sema.h (original) > +++ cfe/trunk/include/clang/Sema/Sema.h Fri Aug 5 18:18:04 2011 > @@ -5948,6 +5948,8 @@ > unsigned ByteNo) const; > > private: > + void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, > + bool isSubscript=false, bool > AllowOnePastEnd=true); > void CheckArrayAccess(const Expr *E); > bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall); > bool CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall); > > Modified: cfe/trunk/lib/Sema/SemaChecking.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=136997&r1=136996&r2=136997&view=diff > > ============================================================================== > --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) > +++ cfe/trunk/lib/Sema/SemaChecking.cpp Fri Aug 5 18:18:04 2011 > @@ -3370,6 +3370,11 @@ > if (E->isTypeDependent() || E->isValueDependent()) > return; > > + // Check for array bounds violations in cases where the check isn't > triggered > + // elsewhere for other Expr types (like BinaryOperators), e.g. when an > + // ArraySubscriptExpr is on the RHS of a variable initialization. > + CheckArrayAccess(E); > + > // This is not the right CC for (e.g.) a variable initialization. > AnalyzeImplicitConversions(*this, E, CC); > } > @@ -3474,6 +3479,15 @@ > << TRange << Op->getSourceRange(); > } > > +static const Type* getElementType(const Expr *BaseExpr) { > + const Type* EltType = BaseExpr->getType().getTypePtr(); > + if (EltType->isAnyPointerType()) > + return EltType->getPointeeType().getTypePtr(); > + else if (EltType->isArrayType()) > + return EltType->getBaseElementTypeUnsafe(); > + return EltType; > +} > I wonder if this would make sense to add to the AST itself... > + > /// \brief Check whether this array fits the idiom of a size-one tail > padded > /// array member of a struct. > /// > @@ -3510,19 +3524,21 @@ > return false; > } > > -static void CheckArrayAccess_Check(Sema &S, > - const clang::ArraySubscriptExpr *E) { > - const Expr *BaseExpr = E->getBase()->IgnoreParenImpCasts(); > +void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, > + bool isSubscript, bool AllowOnePastEnd) { > + const Type* EffectiveType = getElementType(BaseExpr); > + BaseExpr = BaseExpr->IgnoreParenCasts(); > + IndexExpr = IndexExpr->IgnoreParenCasts(); > + > const ConstantArrayType *ArrayTy = > - S.Context.getAsConstantArrayType(BaseExpr->getType()); > + Context.getAsConstantArrayType(BaseExpr->getType()); > if (!ArrayTy) > return; > > - const Expr *IndexExpr = E->getIdx(); > if (IndexExpr->isValueDependent()) > return; > llvm::APSInt index; > - if (!IndexExpr->isIntegerConstantExpr(index, S.Context)) > + if (!IndexExpr->isIntegerConstantExpr(index, Context)) > return; > > const NamedDecl *ND = NULL; > @@ -3535,47 +3551,94 @@ > llvm::APInt size = ArrayTy->getSize(); > if (!size.isStrictlyPositive()) > return; > + > + const Type* BaseType = getElementType(BaseExpr); > + if (!isSubscript && BaseType != EffectiveType) { > + // Make sure we're comparing apples to apples when comparing index > to size > + uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType); > + uint64_t array_typesize = Context.getTypeSize(BaseType); > It would be good to use more LLVM-style local variable names such as 'PtrArithTypeSize' > + if (ptrarith_typesize != array_typesize) { > + // There's a cast to a different size type involved > + uint64_t ratio = array_typesize / ptrarith_typesize; > + // TODO: Be smarter about handling cases where array_typesize is > not a > + // multiple of ptrarith_typesize > + if (ptrarith_typesize * ratio == array_typesize) > + size *= llvm::APInt(size.getBitWidth(), ratio); > + } > + } > + > if (size.getBitWidth() > index.getBitWidth()) > index = index.sext(size.getBitWidth()); > else if (size.getBitWidth() < index.getBitWidth()) > size = size.sext(index.getBitWidth()); > > - // Don't warn for valid indexes > - if (index.slt(size)) > + // For array subscripting the index must be less than size, but for > pointer > + // arithmetic also allow the index (offset) to be equal to size since > + // computing the next address after the end of the array is legal and > + // commonly done e.g. in C++ iterators and range-based for loops. > This doesn't seem to match the logic below, which appears (but perhaps I'm just misreading it) to allow one-past-the-end for code like "&foo[N]"... But maybe I've just misread the code below. > + if (AllowOnePastEnd ? index.sle(size) : index.slt(size)) > return; > > // Also don't warn for arrays of size 1 which are members of some > // structure. These are often used to approximate flexible arrays in > C89 > // code. > - if (IsTailPaddedMemberArray(S, size, ND)) > + if (IsTailPaddedMemberArray(*this, size, ND)) > return; > > - S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr, > - S.PDiag(diag::warn_array_index_exceeds_bounds) > - << index.toString(10, true) > - << size.toString(10, true) > - << (unsigned)size.ugt(1) > - << IndexExpr->getSourceRange()); > + unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds; > + if (isSubscript) > + DiagID = diag::warn_array_index_exceeds_bounds; > + > + DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr, > + PDiag(DiagID) << index.toString(10, true) > + << size.toString(10, true) > + << (unsigned)size.getLimitedValue(~0U) > + << IndexExpr->getSourceRange()); > } else { > - S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr, > - S.PDiag(diag::warn_array_index_precedes_bounds) > - << index.toString(10, true) > - << IndexExpr->getSourceRange()); > + unsigned DiagID = diag::warn_array_index_precedes_bounds; > + if (!isSubscript) { > + DiagID = diag::warn_ptr_arith_precedes_bounds; > + if (index.isNegative()) index = -index; > + } > + > + DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr, > + PDiag(DiagID) << index.toString(10, true) > + << IndexExpr->getSourceRange()); > } > > if (ND) > - S.DiagRuntimeBehavior(ND->getLocStart(), BaseExpr, > - S.PDiag(diag::note_array_index_out_of_bounds) > - << ND->getDeclName()); > + DiagRuntimeBehavior(ND->getLocStart(), BaseExpr, > + PDiag(diag::note_array_index_out_of_bounds) > + << ND->getDeclName()); > } > > void Sema::CheckArrayAccess(const Expr *expr) { > - while (true) { > - expr = expr->IgnoreParens(); > + int AllowOnePastEnd = 0; > + while (expr) { > + expr = expr->IgnoreParenImpCasts(); > switch (expr->getStmtClass()) { > - case Stmt::ArraySubscriptExprClass: > - CheckArrayAccess_Check(*this, cast<ArraySubscriptExpr>(expr)); > + case Stmt::ArraySubscriptExprClass: { > + const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(expr); > + CheckArrayAccess(ASE->getBase(), ASE->getIdx(), true, > + AllowOnePastEnd > 0); > return; > + } > + case Stmt::UnaryOperatorClass: { > + // Only unwrap the * and & unary operators > + const UnaryOperator *UO = cast<UnaryOperator>(expr); > + expr = UO->getSubExpr(); > + switch (UO->getOpcode()) { > + case UO_AddrOf: > + AllowOnePastEnd++; > + break; > + case UO_Deref: > + AllowOnePastEnd--; > + break; > + default: > + return; > + } > + break; > + } > case Stmt::ConditionalOperatorClass: { > const ConditionalOperator *cond = cast<ConditionalOperator>(expr); > if (const Expr *lhs = cond->getLHS()) > > Modified: cfe/trunk/lib/Sema/SemaExpr.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=136997&r1=136996&r2=136997&view=diff > > ============================================================================== > --- cfe/trunk/lib/Sema/SemaExpr.cpp (original) > +++ cfe/trunk/lib/Sema/SemaExpr.cpp Fri Aug 5 18:18:04 2011 > @@ -322,6 +322,7 @@ > // A glvalue of a non-function, non-array type T can be > // converted to a prvalue. > if (!E->isGLValue()) return Owned(E); > + > QualType T = E->getType(); > assert(!T.isNull() && "r-value conversion on typeless expression?"); > > @@ -364,8 +365,6 @@ > // type of the lvalue. > if (T.hasQualifiers()) > T = T.getUnqualifiedType(); > - > - CheckArrayAccess(E); > > return Owned(ImplicitCastExpr::Create(Context, T, CK_LValueToRValue, > E, 0, VK_RValue)); > @@ -3397,6 +3396,12 @@ > > Arg = ArgExpr.takeAs<Expr>(); > } > + > + // Check for array bounds violations for each argument to the call. > This > + // check only triggers warnings when the argument isn't a more complex > Expr > + // with its own checking, such as a BinaryOperator. > + CheckArrayAccess(Arg); > + > AllArgs.push_back(Arg); > } > > @@ -5935,6 +5940,9 @@ > return QualType(); > } > > + // Check array bounds for pointer arithemtic > + CheckArrayAccess(PExp, IExp); > + > if (CompLHSTy) { > QualType LHSTy = Context.isPromotableBitField(lex.get()); > if (LHSTy.isNull()) { > @@ -5991,6 +5999,12 @@ > if (!checkArithmeticOpPointerOperand(*this, Loc, lex.get())) > return QualType(); > > + Expr *IExpr = rex.get()->IgnoreParenCasts(); > + UnaryOperator negRex(IExpr, UO_Minus, IExpr->getType(), VK_RValue, > Variables should be CamelCased. Also, maybe 'NegatedRHS' would be more clear? A bit unfortunate we're already using lex/rex here. > + OK_Ordinary, IExpr->getExprLoc()); > + // Check array bounds for pointer arithemtic > + CheckArrayAccess(lex.get()->IgnoreParenCasts(), &negRex); > + > if (CompLHSTy) *CompLHSTy = lex.get()->getType(); > return lex.get()->getType(); > } > @@ -6984,9 +6998,7 @@ > return QualType(); > > CheckForNullPointerDereference(*this, LHS); > - // Check for trivial buffer overflows. > - CheckArrayAccess(LHS->IgnoreParenCasts()); > - > + > // C99 6.5.16p3: The type of an assignment expression is the type of the > // left operand unless the left operand has qualified type, in which case > // it is the unqualified version of the type of the left operand. > @@ -7721,6 +7733,11 @@ > } > if (ResultTy.isNull() || lhs.isInvalid() || rhs.isInvalid()) > return ExprError(); > + > + // Check for array bounds violations for both sides of the > BinaryOperator > + CheckArrayAccess(lhs.get()); > + CheckArrayAccess(rhs.get()); > + > if (CompResultTy.isNull()) > return Owned(new (Context) BinaryOperator(lhs.take(), rhs.take(), Opc, > ResultTy, VK, OK, OpLoc)); > @@ -8071,6 +8088,13 @@ > if (resultType.isNull() || Input.isInvalid()) > return ExprError(); > > + // Check for array bounds violations in the operand of the > UnaryOperator, > + // except for the '*' and '&' operators that have to be handled > specially > + // by CheckArrayAccess (as there are special cases like > &array[arraysize] > + // that are explicitly defined as valid by the standard). > + if (Opc != UO_AddrOf && Opc != UO_Deref) > + CheckArrayAccess(Input.get()); > + > return Owned(new (Context) UnaryOperator(Input.take(), Opc, resultType, > VK, OK, OpLoc)); > } > > Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=136997&r1=136996&r2=136997&view=diff > > ============================================================================== > --- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original) > +++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Fri Aug 5 18:18:04 2011 > @@ -2264,9 +2264,6 @@ > if (!From->isGLValue()) break; > } > > - // Check for trivial buffer overflows. > - CheckArrayAccess(From); > - > FromType = FromType.getUnqualifiedType(); > From = ImplicitCastExpr::Create(Context, FromType, CK_LValueToRValue, > From, 0, VK_RValue); > > Modified: cfe/trunk/test/SemaCXX/array-bounds.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/array-bounds.cpp?rev=136997&r1=136996&r2=136997&view=diff > > ============================================================================== > --- cfe/trunk/test/SemaCXX/array-bounds.cpp (original) > +++ cfe/trunk/test/SemaCXX/array-bounds.cpp Fri Aug 5 18:18:04 2011 > @@ -37,11 +37,16 @@ > s2.a[3] = 0; // no warning for 0-sized array > > union { > - short a[2]; // expected-note {{declared here}} > + short a[2]; // expected-note 4 {{declared here}} > char c[4]; > } u; > u.a[3] = 1; // expected-warning {{array index of '3' indexes past the end > of an array (that contains 2 elements)}} > u.c[3] = 1; // no warning > + short *p = &u.a[2]; // no warning > + p = &u.a[3]; // expected-warning {{array index of '3' indexes past the > end of an array (that contains 2 elements)}} > + *(&u.a[2]) = 1; // expected-warning {{array index of '2' indexes past > the end of an array (that contains 2 elements)}} > + *(&u.a[3]) = 1; // expected-warning {{array index of '3' indexes past > the end of an array (that contains 2 elements)}} > + *(&u.c[3]) = 1; // no warning > > const int const_subscript = 3; > int array[2]; // expected-note {{declared here}} > @@ -153,8 +158,7 @@ > enum enumA { enumA_A, enumA_B, enumA_C, enumA_D, enumA_E }; > enum enumB { enumB_X, enumB_Y, enumB_Z }; > static enum enumB myVal = enumB_X; > -void test_nested_switch() > -{ > +void test_nested_switch() { > switch (enumA_E) { // expected-warning {{no case matching constant}} > switch (myVal) { // expected-warning {{enumeration values 'enumB_X' and > 'enumB_Z' not handled in switch}} > case enumB_Y: ; > @@ -199,3 +203,14 @@ > B->c[3]; // expected-warning {{array index of '3' indexes past > the end of an array (that contains 1 element)}} > } > } > + > +void bar(int x) {} > +int test_more() { > + int foo[5]; // expected-note 5 {{array 'foo' declared here}} > + bar(foo[5]); // expected-warning {{array index of '5' indexes past the > end of an array (that contains 5 elements)}} > + ++foo[5]; // expected-warning {{array index of '5' indexes past the end > of an array (that contains 5 elements)}} > + if (foo[6]) // expected-warning {{array index of '6' indexes past the > end of an array (that contains 5 elements)}} > + return --foo[6]; // expected-warning {{array index of '6' indexes past > the end of an array (that contains 5 elements)}} > + else > + return foo[5]; // expected-warning {{array index of '5' indexes past > the end of an array (that contains 5 elements)}} > +} > > > _______________________________________________ > cfe-commits mailing list > [email protected] > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
