Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-15 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71585.
meikeb added a comment.

.


https://reviews.llvm.org/D24584

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3850,7 +3850,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  SourceLocation getLocationOfByte(
+  unsigned ByteNo, const SourceManager , const LangOptions ,
+  const TargetInfo , 

Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-15 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71584.
meikeb added a comment.

Rebase


https://reviews.llvm.org/D24584

Files:
  lib/CodeGen/CGBlocks.cpp
  lib/CodeGen/CGBlocks.h
  lib/Sema/SemaChecking.cpp
  test/CodeGenObjCXX/lambda-expressions.mm
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: test/CodeGenObjCXX/lambda-expressions.mm
===
--- test/CodeGenObjCXX/lambda-expressions.mm
+++ test/CodeGenObjCXX/lambda-expressions.mm
@@ -4,6 +4,8 @@
 typedef int (^fp)();
 fp f() { auto x = []{ return 3; }; return x; }
 
+// ARC: %[[LAMBDACLASS:.*]] = type { i32 }
+
 // MRC: @OBJC_METH_VAR_NAME{{.*}} = private global [5 x i8] c"copy\00"
 // MRC: @OBJC_METH_VAR_NAME{{.*}} = private global [12 x i8] c"autorelease\00"
 // MRC-LABEL: define i32 ()* @_Z1fv(
@@ -60,6 +62,40 @@
 }
 @end
 
+// ARC: define void @_ZN13LambdaCapture4foo1ERi(i32* dereferenceable(4) %{{.*}})
+// ARC:   %[[CAPTURE0:.*]] = getelementptr inbounds %[[LAMBDACLASS]], %[[LAMBDACLASS]]* %{{.*}}, i32 0, i32 0
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE0]]
+
+// ARC: define internal void @"_ZZN13LambdaCapture4foo1ERiENK3$_3clEv"(%[[LAMBDACLASS]]* %{{.*}})
+// ARC:   %[[BLOCK:.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>
+// ARC:   %[[CAPTURE1:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %[[BLOCK]], i32 0, i32 5
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE1]]
+
+// ARC: define internal void @"___ZZN13LambdaCapture4foo1ERiENK3$_3clEv_block_invoke"
+// ARC:   %[[CAPTURE2:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %{{.*}}, i32 0, i32 5
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE2]]
+
+// ARC: define internal void @"___ZZN13LambdaCapture4foo1ERiENK3$_3clEv_block_invoke_2"(i8* %{{.*}})
+// ARC:   %[[CAPTURE3:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %{{.*}}, i32 0, i32 5
+// ARC:   %[[V1:.*]] = load i32, i32* %[[CAPTURE3]]
+// ARC:   store i32 %[[V1]], i32* @_ZN13LambdaCapture1iE
+
+namespace LambdaCapture {
+  int i;
+  void foo1(int ) {
+auto lambda = [a]{
+  auto block1 = ^{
+auto block2 = ^{
+  i = a;
+};
+block2();
+  };
+  block1();
+};
+lambda();
+  }
+}
+
 // ARC-LABEL: define linkonce_odr i32 ()* @_ZZNK13StaticMembersIfE1fMUlvE_clEvENKUlvE_cvU13block_pointerFivEEv
 
 // Check lines for BlockInLambda test below
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3850,7 +3850,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = 

Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-15 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71583.
meikeb added a comment.

Try to drop randomly uploaded commit.


https://reviews.llvm.org/D24584

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3850,7 +3850,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  SourceLocation getLocationOfByte(
+  unsigned ByteNo, const SourceManager , const 

Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-15 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71581.
meikeb added a comment.

Rebase to current commit.


https://reviews.llvm.org/D24584

Files:
  lib/CodeGen/CGBlocks.cpp
  lib/CodeGen/CGBlocks.h
  lib/Sema/SemaChecking.cpp
  test/CodeGenObjCXX/lambda-expressions.mm
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: test/CodeGenObjCXX/lambda-expressions.mm
===
--- test/CodeGenObjCXX/lambda-expressions.mm
+++ test/CodeGenObjCXX/lambda-expressions.mm
@@ -4,6 +4,8 @@
 typedef int (^fp)();
 fp f() { auto x = []{ return 3; }; return x; }
 
+// ARC: %[[LAMBDACLASS:.*]] = type { i32 }
+
 // MRC: @OBJC_METH_VAR_NAME{{.*}} = private global [5 x i8] c"copy\00"
 // MRC: @OBJC_METH_VAR_NAME{{.*}} = private global [12 x i8] c"autorelease\00"
 // MRC-LABEL: define i32 ()* @_Z1fv(
@@ -60,6 +62,40 @@
 }
 @end
 
+// ARC: define void @_ZN13LambdaCapture4foo1ERi(i32* dereferenceable(4) %{{.*}})
+// ARC:   %[[CAPTURE0:.*]] = getelementptr inbounds %[[LAMBDACLASS]], %[[LAMBDACLASS]]* %{{.*}}, i32 0, i32 0
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE0]]
+
+// ARC: define internal void @"_ZZN13LambdaCapture4foo1ERiENK3$_3clEv"(%[[LAMBDACLASS]]* %{{.*}})
+// ARC:   %[[BLOCK:.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>
+// ARC:   %[[CAPTURE1:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %[[BLOCK]], i32 0, i32 5
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE1]]
+
+// ARC: define internal void @"___ZZN13LambdaCapture4foo1ERiENK3$_3clEv_block_invoke"
+// ARC:   %[[CAPTURE2:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %{{.*}}, i32 0, i32 5
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE2]]
+
+// ARC: define internal void @"___ZZN13LambdaCapture4foo1ERiENK3$_3clEv_block_invoke_2"(i8* %{{.*}})
+// ARC:   %[[CAPTURE3:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %{{.*}}, i32 0, i32 5
+// ARC:   %[[V1:.*]] = load i32, i32* %[[CAPTURE3]]
+// ARC:   store i32 %[[V1]], i32* @_ZN13LambdaCapture1iE
+
+namespace LambdaCapture {
+  int i;
+  void foo1(int ) {
+auto lambda = [a]{
+  auto block1 = ^{
+auto block2 = ^{
+  i = a;
+};
+block2();
+  };
+  block1();
+};
+lambda();
+  }
+}
+
 // ARC-LABEL: define linkonce_odr i32 ()* @_ZZNK13StaticMembersIfE1fMUlvE_clEvENKUlvE_cvU13block_pointerFivEEv
 
 // Check lines for BlockInLambda test below
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3850,7 +3850,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned 

Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-14 Thread Meike Baumgärtner via cfe-commits
meikeb added a comment.

This is the same as https://reviews.llvm.org/D23820 besides that I added myself 
in the commit message as "Patch by". https://reviews.llvm.org/D23820 was 
reverted in https://reviews.llvm.org/D24579 because srhines' commit took 
authorship of this patch.


https://reviews.llvm.org/D24584



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-14 Thread Meike Baumgärtner via cfe-commits
meikeb created this revision.
meikeb added a reviewer: rsmith.
meikeb added subscribers: cfe-commits, srhines.

The warning for a format string not being a sting literal and therefore
being potentially insecure is overly strict for indecies into sting
literals. This fix checks if the index into the string literal is
precomputable. If thats the case it will check if the suffix of that
sting literal is a valid format string string literal. It will still
issue the aforementioned warning for out of range indecies into the
string literal.

Patch by Meike Baumgärtner (meikeb)

https://reviews.llvm.org/D24584

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3839,7 +3839,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+

Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-14 Thread Meike Baumgärtner via cfe-commits
meikeb marked an inline comment as done.
meikeb added a comment.

Thanks for reviewing my patch! It would be great if someone could submit this 
patch for me.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Meike Baumgärtner via cfe-commits
meikeb marked an inline comment as done.


Comment at: lib/Sema/SemaChecking.cpp:3864-3867
@@ +3863,6 @@
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }

This is way better. Thank you.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Meike Baumgärtner via cfe-commits
meikeb marked 2 inline comments as done.
meikeb added a comment.

https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71284.
meikeb added a comment.

Try to improve offset sum helper function name and fix style issues.


https://reviews.llvm.org/D23820

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3839,7 +3839,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else if (AddendIsRight && BinOpKind == BO_Sub)
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  else
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  SourceLocation 

Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Meike Baumgärtner via cfe-commits
meikeb added a comment.

I explained why I chose the names that you commented on. Feel free to add your 
thoughts if you still think another name would be more fitting.



Comment at: lib/Sema/SemaChecking.cpp:3842
@@ -3841,2 +3841,3 @@
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumUpStringLiteralOffset(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,

srhines wrote:
> Is "computeStringLiteralOffset" or "calculate..." a better name here?
I thought about that but decided to go with sumUp because compute or calculate 
sounds like this function would do what we actually do what the caller of this 
function does (computing the offset). This is just a nice helper to sum up the 
offset we already have with another piece of offset.


Comment at: lib/Sema/SemaChecking.cpp:3844
@@ +3843,3 @@
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();

srhines wrote:
> Is "Operand" better than "Addend"? In particular, there is the possibility 
> that we do subtraction of the value instead of addition, so "Addend" makes it 
> a bit confusing. Of course, I then would expect "OperandIsRight" instead of 
> "AddendIsRight" too.
Clang summarizes sub and add as "additive" operands. This is why I think this 
is fitting. Operand is misleading because it includes a lot more operands than 
add and sub imo.


Comment at: lib/Sema/SemaChecking.cpp:3860
@@ +3859,3 @@
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;

srhines wrote:
> Ov -> Overflow
I named that in compliance with clang naming. E.g. sadd_ov. It is common in 
this file to abbreviate variable names with 1-3 characters.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71199.
meikeb marked 7 inline comments as done.
meikeb added a comment.

Fix typos and add assert to sum up offset helper.


https://reviews.llvm.org/D23820

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3839,7 +3839,94 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumUpStringLiteralOffset(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else if (AddendIsRight && BinOpKind == BO_Sub)
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  else
+assert(false && "operator must be add or sub with addend on the right");
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumUpStringLiteralOffset(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  

Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-12 Thread Meike Baumgärtner via cfe-commits
meikeb marked 6 inline comments as done.
meikeb added a comment.

Thanks for taking the time and doing these great reviews! Really appreciated!



Comment at: lib/Sema/SemaChecking.cpp:4143-4150
@@ -4049,3 +4142,10 @@
 if (StrE) {
-  CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx,
+  if (Offset.isNegative() || Offset > StrE->getLength()) {
+// TODO: It would be better to have an explicit warning for out of
+// bounds literals.
+return SLCT_NotALiteral;
+  }
+  FormatStringLiteral FStr =
+  FormatStringLiteral(StrE, Offset.sextOrTrunc(64).getSExtValue());
+  CheckFormatString(S, , E, Args, HasVAListArg, format_idx,
 firstDataArg, Type, InFunctionCall, CallType,

The = case is part of a different warning. It's checked in CheckFormatString.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-12 Thread Meike Baumgärtner via cfe-commits
meikeb marked 4 inline comments as done.


Comment at: lib/Sema/SemaChecking.cpp:4166-4167
@@ +4165,4 @@
+if (BinOp->isAdditiveOp()) {
+  bool LIsInt = false;
+  bool RIsInt = false;
+  // Prevent asserts triggering in EvaluateAsInt by checking if we deal 
with

I hope this additional check fixes this issue. As far as I read the code there 
were none such asserts in isIntegerConstantExpr(). Thanks for explaining this!


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-12 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71043.
meikeb added a comment.

Fix the mentioned issues.


https://reviews.llvm.org/D23820

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3839,7 +3839,92 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void reckonUpOffset(llvm::APSInt , llvm::APSInt Addend,
+   BinaryOperatorKind BinOpKind, bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Align the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else if (AddendIsRight && BinOpKind == BO_Sub)
+ResOffset = Offset.ssub_ov(Addend, Ov);
+
+  // We add a offset to a pointer here so we should support a offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+reckonUpOffset(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when retruning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return StringRef(FExpr->getString().data() + Offset,
+ FExpr->getLength() - Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  SourceLocation getLocationOfByte(
+  unsigned ByteNo, const SourceManager , const LangOptions ,
+  const TargetInfo , unsigned *StartToken = nullptr,
+  

Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-08-26 Thread Meike Baumgärtner via cfe-commits
meikeb added inline comments.


Comment at: lib/Sema/SemaChecking.cpp:4089-4090
@@ +4088,4 @@
+if (BinOp->isAdditiveOp()) {
+  bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context);
+  bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context);
+

rsmith wrote:
> What happens if one of these expressions is value-dependent? The evaluator 
> can crash or assert if given a value-dependent expression. If we don't defer 
> these checks in dependent contexts, you'll need to handle that possibility 
> somehow.
> 
> Example:
> 
>   template void f(const char *p) {
> printf("blah blah %s" + N, p);
>   }
I think I don't understand what you are trying to tell me. Especially the 
example you provided does just fine and behaves as I expected. As far as I 
followed EvaluateAsInt it does not assert but returns false if we don't get a 
constexpr here. We warn under -Wformat-nonliteral for value-dependent string 
literals.

Could you explain this more or provide an example that triggers an assert or 
explain what behavior is wrong regarding the provided example? Thanks! 


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-08-23 Thread Meike Baumgärtner via cfe-commits
meikeb marked an inline comment as done.


Comment at: lib/Sema/SemaChecking.cpp:3856
@@ -3855,3 +3855,3 @@
 // format string, we will usually need to emit a warning.
 // True string literals are then checked by CheckFormatString.
 static StringLiteralCheckType

I'm not sure if that should be mentioned here because it is a very high level 
comment and the suffix of a string literal is a string literal itself.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-08-23 Thread Meike Baumgärtner via cfe-commits
meikeb created this revision.
meikeb added a reviewer: rsmith.
meikeb added a subscriber: cfe-commits.

The warning for a format string not being a sting literal and therefore
being potentially insecure is overly strict for indecies into sting
literals. This fix checks if the index into the string literal is
precomputable. If thats the case it will check if the suffix of that
sting literal is a valid format string string literal. It will still
issue the aforementioned warning for out of range indecies into the
string literal.

https://reviews.llvm.org/D23820

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,34 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2); // no-warning
+  printf(s2 + 2); // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2);// no-warning
+  printf(6 + s2 - 2);// no-warning
+  printf(2 + (b ? s1 : s2)); // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), ""); // no-warning
+  printf(2 + (b ? s1 : s2 - 2), ""); // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2); // no-warning
+  printf(0 ? s2 : s2 + 2); // no-warning
+  printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3860,7 +3860,8 @@
   unsigned firstDataArg, Sema::FormatStringType Type,
   Sema::VariadicCallType CallType, bool InFunctionCall,
   llvm::SmallBitVector ,
-  UncoveredArgHandler ) {
+  UncoveredArgHandler ,
+  int64_t ) {
  tryAgain:
   if (E->isTypeDependent() || E->isValueDependent())
 return SLCT_NotALiteral;
@@ -3895,23 +3896,31 @@
 CheckLeft = false;
 }
 
+// We need to maintain the offsets for the right and the left hand side
+// seperately to check if every possible indexed expression is a valid
+// string literal. They might have different offsets for different string
+// literals in the end.
+int64_t LOffset = Offset;
 StringLiteralCheckType Left;
 if (!CheckLeft)
   Left = SLCT_UncheckedLiteral;
 else {
   Left = checkFormatStringExpr(S, C->getTrueExpr(), Args,
HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall,
-   CheckedVarArgs, UncoveredArg);
-  if (Left == SLCT_NotALiteral || !CheckRight)
+   CheckedVarArgs, UncoveredArg, LOffset);
+  if (Left == SLCT_NotALiteral || !CheckRight) {
+Offset = LOffset;
 return Left;
+  }
 }
 
+int64_t ROffset = Offset;
 StringLiteralCheckType Right =
 checkFormatStringExpr(S, C->getFalseExpr(), Args,
   HasVAListArg, format_idx, firstDataArg,
   Type, CallType, InFunctionCall, CheckedVarArgs,
-  UncoveredArg);
+  UncoveredArg, ROffset);
 
 return (CheckLeft && Left < Right) ? Left : Right;
   }
@@ -3964,8 +3973,8 @@
   return checkFormatStringExpr(S, Init, Args,
HasVAListArg, format_idx,
firstDataArg, Type, CallType,
-   /*InFunctionCall*/false, CheckedVarArgs,
-   UncoveredArg);
+   /*InFunctionCall*/ false, CheckedVarArgs,
+   UncoveredArg, Offset);
 }
   }
 
@@ -4020,7 +4029,7 @@
 return checkFormatStringExpr(S, Arg, Args,
  HasVAListArg, format_idx, firstDataArg,