Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-14 Thread Stephen Hines via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL281527: Do not warn about format strings that are indexed 
string literals. (authored by srhines).

Changed prior to commit:
  https://reviews.llvm.org/D23820?vs=71286=71416#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D23820

Files:
  cfe/trunk/lib/Sema/SemaChecking.cpp
  cfe/trunk/test/Sema/format-strings.c

Index: cfe/trunk/test/Sema/format-strings.c
===
--- cfe/trunk/test/Sema/format-strings.c
+++ cfe/trunk/test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: cfe/trunk/lib/Sema/SemaChecking.cpp
===
--- cfe/trunk/lib/Sema/SemaChecking.cpp
+++ cfe/trunk/lib/Sema/SemaChecking.cpp
@@ -3841,7 +3841,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return 

Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-14 Thread Stephen Hines via cfe-commits
srhines added a comment.

I will take care of submitting this. Thanks for the reviews everybody!


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-14 Thread Meike Baumgärtner via cfe-commits
meikeb marked an inline comment as done.
meikeb added a comment.

Thanks for reviewing my patch! It would be great if someone could submit this 
patch for me.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Meike Baumgärtner via cfe-commits
meikeb marked an inline comment as done.


Comment at: lib/Sema/SemaChecking.cpp:3864-3867
@@ +3863,6 @@
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }

This is way better. Thank you.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Meike Baumgärtner via cfe-commits
meikeb marked 2 inline comments as done.
meikeb added a comment.

https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71284.
meikeb added a comment.

Try to improve offset sum helper function name and fix style issues.


https://reviews.llvm.org/D23820

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3839,7 +3839,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else if (AddendIsRight && BinOpKind == BO_Sub)
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  else
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  SourceLocation 

Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Richard Smith via cfe-commits
rsmith added inline comments.


Comment at: lib/Sema/SemaChecking.cpp:3864-3867
@@ +3863,6 @@
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else if (AddendIsRight && BinOpKind == BO_Sub)
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  else
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");

The suggestion was to remove the condition in the `else if` and put the 
assertion inside its body, rather than duplicating it here:

  if (BinOpKind == BO_Add)
// handle add
  else {
assert(it's a subtract);
// handle sub
  }


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Richard Smith via cfe-commits
rsmith accepted this revision.
rsmith added a comment.
This revision is now accepted and ready to land.

This basically looks fine to me now. I'm not 100% sold on 
`sumUpStringLiteralOffset` being the best name, but I think we have better 
things to worry about, and it's good enough. Just a couple of minor style 
issues then I think this is good to go.



Comment at: lib/Sema/SemaChecking.cpp:3866-3867
@@ +3865,4 @@
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  else
+assert(false && "operator must be add or sub with addend on the right");
+

Rather than `assert(false && XXX);`, use either `llvm_unreachable(XXX)` or 
change the previous case to be:

  else {
assert(AddendIsRight && BinOpKind == BO_Sub &&
   "operator must be ...");


Comment at: lib/Sema/SemaChecking.cpp:4150-4151
@@ +4149,4 @@
+  }
+  FormatStringLiteral FStr =
+  FormatStringLiteral(StrE, Offset.sextOrTrunc(64).getSExtValue());
+  CheckFormatString(S, , E, Args, HasVAListArg, format_idx,

You can write this more simply as

  FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue());


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Meike Baumgärtner via cfe-commits
meikeb added a comment.

I explained why I chose the names that you commented on. Feel free to add your 
thoughts if you still think another name would be more fitting.



Comment at: lib/Sema/SemaChecking.cpp:3842
@@ -3841,2 +3841,3 @@
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumUpStringLiteralOffset(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,

srhines wrote:
> Is "computeStringLiteralOffset" or "calculate..." a better name here?
I thought about that but decided to go with sumUp because compute or calculate 
sounds like this function would do what we actually do what the caller of this 
function does (computing the offset). This is just a nice helper to sum up the 
offset we already have with another piece of offset.


Comment at: lib/Sema/SemaChecking.cpp:3844
@@ +3843,3 @@
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();

srhines wrote:
> Is "Operand" better than "Addend"? In particular, there is the possibility 
> that we do subtraction of the value instead of addition, so "Addend" makes it 
> a bit confusing. Of course, I then would expect "OperandIsRight" instead of 
> "AddendIsRight" too.
Clang summarizes sub and add as "additive" operands. This is why I think this 
is fitting. Operand is misleading because it includes a lot more operands than 
add and sub imo.


Comment at: lib/Sema/SemaChecking.cpp:3860
@@ +3859,3 @@
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;

srhines wrote:
> Ov -> Overflow
I named that in compliance with clang naming. E.g. sadd_ov. It is common in 
this file to abbreviate variable names with 1-3 characters.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71199.
meikeb marked 7 inline comments as done.
meikeb added a comment.

Fix typos and add assert to sum up offset helper.


https://reviews.llvm.org/D23820

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3839,7 +3839,94 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumUpStringLiteralOffset(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else if (AddendIsRight && BinOpKind == BO_Sub)
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  else
+assert(false && "operator must be add or sub with addend on the right");
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumUpStringLiteralOffset(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  

Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-13 Thread Stephen Hines via cfe-commits
srhines added a comment.

My comment is mostly naming considerations to improve clarity. I do have 
concerns though about the unhandled else path.



Comment at: lib/Sema/SemaChecking.cpp:3842
@@ -3841,2 +3841,3 @@
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void sumUpStringLiteralOffset(llvm::APSInt , llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,

Is "computeStringLiteralOffset" or "calculate..." a better name here?


Comment at: lib/Sema/SemaChecking.cpp:3844
@@ +3843,3 @@
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();

Is "Operand" better than "Addend"? In particular, there is the possibility that 
we do subtraction of the value instead of addition, so "Addend" makes it a bit 
confusing. Of course, I then would expect "OperandIsRight" instead of 
"AddendIsRight" too.


Comment at: lib/Sema/SemaChecking.cpp:3852
@@ +3851,3 @@
+  }
+  // Align the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {

Align -> Canonicalize or Adjust

This is confusing here as "Align" already has too many overloaded meanings in 
programming (that could be relevant to bitwidths). Canonicalize or Adjust don't 
have this problem.


Comment at: lib/Sema/SemaChecking.cpp:3860
@@ +3859,3 @@
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;

Ov -> Overflow


Comment at: lib/Sema/SemaChecking.cpp:3865
@@ +3864,3 @@
+  else if (AddendIsRight && BinOpKind == BO_Sub)
+ResOffset = Offset.ssub_ov(Addend, Ov);
+

What happens if someone passes something that isn't caught by these two cases? 
Should we be returning an indicator that the calculation failed? If not, should 
we assert here?


Comment at: lib/Sema/SemaChecking.cpp:3867
@@ +3866,3 @@
+
+  // We add a offset to a pointer here so we should support a offset as big as
+  // possible.

2 places to fix: "a offset" -> "an offset"


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-12 Thread Meike Baumgärtner via cfe-commits
meikeb marked 6 inline comments as done.
meikeb added a comment.

Thanks for taking the time and doing these great reviews! Really appreciated!



Comment at: lib/Sema/SemaChecking.cpp:4143-4150
@@ -4049,3 +4142,10 @@
 if (StrE) {
-  CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx,
+  if (Offset.isNegative() || Offset > StrE->getLength()) {
+// TODO: It would be better to have an explicit warning for out of
+// bounds literals.
+return SLCT_NotALiteral;
+  }
+  FormatStringLiteral FStr =
+  FormatStringLiteral(StrE, Offset.sextOrTrunc(64).getSExtValue());
+  CheckFormatString(S, , E, Args, HasVAListArg, format_idx,
 firstDataArg, Type, InFunctionCall, CallType,

The = case is part of a different warning. It's checked in CheckFormatString.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-12 Thread Richard Smith via cfe-commits
rsmith added inline comments.


Comment at: lib/Sema/SemaChecking.cpp:3842-3843
@@ -3841,2 +3841,4 @@
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void reckonUpOffset(llvm::APSInt , llvm::APSInt Addend,
+   BinaryOperatorKind BinOpKind, bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();

I can't tell from this declaration what this function is for -- what does 
"reckon up" mean?


Comment at: lib/Sema/SemaChecking.cpp:3880
@@ +3879,3 @@
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when retruning
+// the string and its length or the source locations to display notes 
correctly.

Typo "retruning"


Comment at: lib/Sema/SemaChecking.cpp:3891-3892
@@ +3890,4 @@
+  StringRef getString() const {
+return StringRef(FExpr->getString().data() + Offset,
+ FExpr->getLength() - Offset);
+  }

I think you can simplify this to `return FExpr->getString().drop_front(Offset);`


Comment at: lib/Sema/SemaChecking.cpp:4143
@@ -4049,2 +4142,3 @@
 if (StrE) {
-  CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx,
+  if (Offset.isNegative()) {
+// TODO: It would be better to have an explicit warning for out of

You should presumably also do this if `Offset` is >= the length of the string 
literal (we want `printf` and friends to at least find the trailing nul byte).


Comment at: lib/Sema/SemaChecking.cpp:4148-4149
@@ +4147,4 @@
+  }
+  FormatStringLiteral *FStr =
+  new FormatStringLiteral(StrE, Offset.sextOrTrunc(64).getSExtValue());
+  CheckFormatString(S, FStr, E, Args, HasVAListArg, format_idx,

Does this need to be heap-allocated?


Comment at: lib/Sema/SemaChecking.cpp:4166-4167
@@ +4165,4 @@
+if (BinOp->isAdditiveOp()) {
+  bool LIsInt = false;
+  bool RIsInt = false;
+  // Prevent asserts triggering in EvaluateAsInt by checking if we deal 
with

meikeb wrote:
> I hope this additional check fixes this issue. As far as I read the code 
> there were none such asserts in isIntegerConstantExpr(). Thanks for 
> explaining this!
OK, I've now checked and the value-dependent case is handled up on line 3953. 
So just calling `EvaluateAsInt` here is fine after all.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-12 Thread Meike Baumgärtner via cfe-commits
meikeb marked 4 inline comments as done.


Comment at: lib/Sema/SemaChecking.cpp:4166-4167
@@ +4165,4 @@
+if (BinOp->isAdditiveOp()) {
+  bool LIsInt = false;
+  bool RIsInt = false;
+  // Prevent asserts triggering in EvaluateAsInt by checking if we deal 
with

I hope this additional check fixes this issue. As far as I read the code there 
were none such asserts in isIntegerConstantExpr(). Thanks for explaining this!


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-12 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71043.
meikeb added a comment.

Fix the mentioned issues.


https://reviews.llvm.org/D23820

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3839,7 +3839,92 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema , const StringLiteral *FExpr,
+static void reckonUpOffset(llvm::APSInt , llvm::APSInt Addend,
+   BinaryOperatorKind BinOpKind, bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Align the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else if (AddendIsRight && BinOpKind == BO_Sub)
+ResOffset = Offset.ssub_ov(Addend, Ov);
+
+  // We add a offset to a pointer here so we should support a offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+reckonUpOffset(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when retruning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return StringRef(FExpr->getString().data() + Offset,
+ FExpr->getLength() - Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  SourceLocation getLocationOfByte(
+  unsigned ByteNo, const SourceManager , const LangOptions ,
+  const TargetInfo , unsigned *StartToken = nullptr,
+  

Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-09-07 Thread Richard Smith via cfe-commits
rsmith added inline comments.


Comment at: lib/Sema/SemaChecking.cpp:4089-4090
@@ +4088,4 @@
+if (BinOp->isAdditiveOp()) {
+  bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context);
+  bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context);
+

meikeb wrote:
> rsmith wrote:
> > What happens if one of these expressions is value-dependent? The evaluator 
> > can crash or assert if given a value-dependent expression. If we don't 
> > defer these checks in dependent contexts, you'll need to handle that 
> > possibility somehow.
> > 
> > Example:
> > 
> >   template void f(const char *p) {
> > printf("blah blah %s" + N, p);
> >   }
> I think I don't understand what you are trying to tell me. Especially the 
> example you provided does just fine and behaves as I expected. As far as I 
> followed EvaluateAsInt it does not assert but returns false if we don't get a 
> constexpr here. We warn under -Wformat-nonliteral for value-dependent string 
> literals.
> 
> Could you explain this more or provide an example that triggers an assert or 
> explain what behavior is wrong regarding the provided example? Thanks! 
We should not warn for that example, since (for instance) calling `f<0>` is 
fine (we should warn for `f<11>`, though, since it has no format specifiers).

While `EvaluateAsInt` happens to not assert for that particular value-dependent 
input, it does assert for some other value-dependent cases. It's not easy for 
me to find you such a case, because Clang is currently careful to never call 
this function on a value-dependent expression, but perhaps this will trigger an 
assert:

  struct S { constexpr S(int n) : n(n) {} int n; };
  template void f(const char *p) {
printf("blah blah %s" + S(N).n, p);
  }


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-08-26 Thread Meike Baumgärtner via cfe-commits
meikeb added inline comments.


Comment at: lib/Sema/SemaChecking.cpp:4089-4090
@@ +4088,4 @@
+if (BinOp->isAdditiveOp()) {
+  bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context);
+  bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context);
+

rsmith wrote:
> What happens if one of these expressions is value-dependent? The evaluator 
> can crash or assert if given a value-dependent expression. If we don't defer 
> these checks in dependent contexts, you'll need to handle that possibility 
> somehow.
> 
> Example:
> 
>   template void f(const char *p) {
> printf("blah blah %s" + N, p);
>   }
I think I don't understand what you are trying to tell me. Especially the 
example you provided does just fine and behaves as I expected. As far as I 
followed EvaluateAsInt it does not assert but returns false if we don't get a 
constexpr here. We warn under -Wformat-nonliteral for value-dependent string 
literals.

Could you explain this more or provide an example that triggers an assert or 
explain what behavior is wrong regarding the provided example? Thanks! 


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-08-23 Thread Richard Smith via cfe-commits
rsmith added a comment.

Please also handle expressions of the form `&"str"[i]`. We warn (under 
`-Wstring-plus-int`) for `"str" + i` and suggest rewriting into that form, so 
we should also handle that form here.



Comment at: lib/Sema/SemaChecking.cpp:3864
@@ -3864,1 +3863,3 @@
+  UncoveredArgHandler ,
+  int64_t ) {
  tryAgain:

Why is this passed by reference? It's just an input, not an output, right?


Comment at: lib/Sema/SemaChecking.cpp:4062-4067
@@ +4061,8 @@
+// into the original string literal.
+StrE = StringLiteral::Create(S.Context,
+ StrE->getString().drop_front(Offset),
+ StrE->getKind(),
+ StrE->isPascal(),
+ StrE->getType(),
+ StrE->getLocStart());
+  } else if (Offset < 0) {

This doesn't seem like it preserves enough information for the downstream code 
to give correct caret diagnostics pointing at locations within the string. It 
seems like it would be extremely complicated to maintain the necessary 
invariants to make that work (you'd need to create a fake string literal source 
buffer so that the `StringLiteralParser` can reparse it, for whichever of the 
string literal tokens the offset ends up within). Have you looked at how much 
work it'd be to feed a starting offset into `CheckFormatString` instead?


Comment at: lib/Sema/SemaChecking.cpp:4089-4090
@@ +4088,4 @@
+if (BinOp->isAdditiveOp()) {
+  bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context);
+  bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context);
+

What happens if one of these expressions is value-dependent? The evaluator can 
crash or assert if given a value-dependent expression. If we don't defer these 
checks in dependent contexts, you'll need to handle that possibility somehow.

Example:

  template void f(const char *p) {
printf("blah blah %s" + N, p);
  }


Comment at: lib/Sema/SemaChecking.cpp:4097-4100
@@ +4096,6 @@
+  if (LIsInt) {
+Offset += LResult.getExtValue();
+E = BinOp->getRHS();
+  } else {
+Offset += RResult.getExtValue();
+E = BinOp->getLHS();

This will assert if the result doesn't fit into 64 bits, and it's not 
guaranteed to (if one of the operands was cast to `__int128`, for instance). 
You could use `getLimitedValue` instead, with some suitable limit.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-08-23 Thread Meike Baumgärtner via cfe-commits
meikeb marked an inline comment as done.


Comment at: lib/Sema/SemaChecking.cpp:3856
@@ -3855,3 +3855,3 @@
 // format string, we will usually need to emit a warning.
 // True string literals are then checked by CheckFormatString.
 static StringLiteralCheckType

I'm not sure if that should be mentioned here because it is a very high level 
comment and the suffix of a string literal is a string literal itself.


https://reviews.llvm.org/D23820



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D23820: Do not warn about format strings that are indexed string literals.

2016-08-23 Thread Meike Baumgärtner via cfe-commits
meikeb created this revision.
meikeb added a reviewer: rsmith.
meikeb added a subscriber: cfe-commits.

The warning for a format string not being a sting literal and therefore
being potentially insecure is overly strict for indecies into sting
literals. This fix checks if the index into the string literal is
precomputable. If thats the case it will check if the suffix of that
sting literal is a valid format string string literal. It will still
issue the aforementioned warning for out of range indecies into the
string literal.

https://reviews.llvm.org/D23820

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,34 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2); // no-warning
+  printf(s2 + 2); // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2);// no-warning
+  printf(6 + s2 - 2);// no-warning
+  printf(2 + (b ? s1 : s2)); // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), ""); // no-warning
+  printf(2 + (b ? s1 : s2 - 2), ""); // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2); // no-warning
+  printf(0 ? s2 : s2 + 2); // no-warning
+  printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3860,7 +3860,8 @@
   unsigned firstDataArg, Sema::FormatStringType Type,
   Sema::VariadicCallType CallType, bool InFunctionCall,
   llvm::SmallBitVector ,
-  UncoveredArgHandler ) {
+  UncoveredArgHandler ,
+  int64_t ) {
  tryAgain:
   if (E->isTypeDependent() || E->isValueDependent())
 return SLCT_NotALiteral;
@@ -3895,23 +3896,31 @@
 CheckLeft = false;
 }
 
+// We need to maintain the offsets for the right and the left hand side
+// seperately to check if every possible indexed expression is a valid
+// string literal. They might have different offsets for different string
+// literals in the end.
+int64_t LOffset = Offset;
 StringLiteralCheckType Left;
 if (!CheckLeft)
   Left = SLCT_UncheckedLiteral;
 else {
   Left = checkFormatStringExpr(S, C->getTrueExpr(), Args,
HasVAListArg, format_idx, firstDataArg,
Type, CallType, InFunctionCall,
-   CheckedVarArgs, UncoveredArg);
-  if (Left == SLCT_NotALiteral || !CheckRight)
+   CheckedVarArgs, UncoveredArg, LOffset);
+  if (Left == SLCT_NotALiteral || !CheckRight) {
+Offset = LOffset;
 return Left;
+  }
 }
 
+int64_t ROffset = Offset;
 StringLiteralCheckType Right =
 checkFormatStringExpr(S, C->getFalseExpr(), Args,
   HasVAListArg, format_idx, firstDataArg,
   Type, CallType, InFunctionCall, CheckedVarArgs,
-  UncoveredArg);
+  UncoveredArg, ROffset);
 
 return (CheckLeft && Left < Right) ? Left : Right;
   }
@@ -3964,8 +3973,8 @@
   return checkFormatStringExpr(S, Init, Args,
HasVAListArg, format_idx,
firstDataArg, Type, CallType,
-   /*InFunctionCall*/false, CheckedVarArgs,
-   UncoveredArg);
+   /*InFunctionCall*/ false, CheckedVarArgs,
+   UncoveredArg, Offset);
 }
   }
 
@@ -4020,7 +4029,7 @@
 return checkFormatStringExpr(S, Arg, Args,
  HasVAListArg, format_idx, firstDataArg,