rtrieu created this revision.

String literals used in ranged-based for-loops will actually process the 
terminating null character as part of the loop, which may be unexpected.

  // This runs three times, once for c = 'h', then c = 'i', and finally as c = 
'\0'
  for (char c : "hi") 

This patch adds a warning to -Wrange-loop-analysis when this is used.  Two ways 
to silence the warning are proposed, by either handling the null character in 
the first statement of the loop body or by putting the string literal in 
parentheses.

  // warning
  for (char c : "hi") {}
  
  // silence by handling null character
  for (char c : "hi") {
    if (c == '\0') {}
  }
  
  // silence by parentheses
  for (char c : ("hi")) {}


https://reviews.llvm.org/D72551

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Sema/SemaStmt.cpp
  clang/test/SemaCXX/warn-range-loop-analysis.cpp

Index: clang/test/SemaCXX/warn-range-loop-analysis.cpp
===================================================================
--- clang/test/SemaCXX/warn-range-loop-analysis.cpp
+++ clang/test/SemaCXX/warn-range-loop-analysis.cpp
@@ -297,3 +297,66 @@
   for (int x : I) {}
   // No warning
 }
+
+namespace StringLiteral {
+void test1(int x) {
+  for (int c : "hello world") {}
+  // expected-warning@-1 {{range-based for-loop working over a string literal will process the trailing null character}}
+  // expected-note@-2 {{fix by explicitly handling the null character at the beginning of the loop body with an if statement}}
+  // expected-note@-3 {{place parenthesis around the string literal to silence this warning}}
+  for (char c : "hello world") {}
+  // expected-warning@-1 {{range-based for-loop working over a string literal will process the trailing null character}}
+  // expected-note@-2 {{fix by explicitly handling the null character at the beginning of the loop body with an if statement}}
+  // expected-note@-3 {{place parenthesis around the string literal to silence this warning}}
+
+  // Various methods of silencing the warning.
+  for (char c : ("hello world")) {}
+  for (char c : "hello world") { if (c > 0) {} }
+  for (char c : "hello world") { if (c == '\0') {} }
+  for (char c : "hello world") { if (c != 0) {} }
+  for (char c : "hello world") { if (c) {} }
+  for (char c : "hello world") if (c > 0) {}
+  for (char c : "hello world") if (c == '\0') {}
+  for (char c : "hello world") if (c != 0) {}
+  for (char c : "hello world") if (c) {}
+
+  // Almost silencing the warning.
+  for (char c : "hello world") { if (c >> 0) {} }
+  // expected-warning@-1 {{null character}}
+  // expected-note@-2 {{if statement}}
+  // expected-note@-3 {{silence this warning}}
+  for (char c : "hello world") { if (x > 0) {} }
+  // expected-warning@-1 {{null character}}
+  // expected-note@-2 {{if statement}}
+  // expected-note@-3 {{silence this warning}}
+  for (char c : "hello world") { if (x == '\0') {} }
+  // expected-warning@-1 {{null character}}
+  // expected-note@-2 {{if statement}}
+  // expected-note@-3 {{silence this warning}}
+  for (char c : "hello world") { if (x != 0) {} }
+  // expected-warning@-1 {{null character}}
+  // expected-note@-2 {{if statement}}
+  // expected-note@-3 {{silence this warning}}
+  for (char c : "hello world") { if (x) {} }
+  // expected-warning@-1 {{null character}}
+  // expected-note@-2 {{if statement}}
+  // expected-note@-3 {{silence this warning}}
+  for (char c : "hello world") if (x > 0) {}
+  // expected-warning@-1 {{null character}}
+  // expected-note@-2 {{if statement}}
+  // expected-note@-3 {{silence this warning}}
+  for (char c : "hello world") if (x == '\0') {}
+  // expected-warning@-1 {{null character}}
+  // expected-note@-2 {{if statement}}
+  // expected-note@-3 {{silence this warning}}
+  for (char c : "hello world") if (x != 0) {}
+  // expected-warning@-1 {{null character}}
+  // expected-note@-2 {{if statement}}
+  // expected-note@-3 {{silence this warning}}
+  for (char c : "hello world") if (x) {}
+  // expected-warning@-1 {{null character}}
+  // expected-note@-2 {{if statement}}
+  // expected-note@-3 {{silence this warning}}
+}
+
+} // namespace StringLiteral
Index: clang/lib/Sema/SemaStmt.cpp
===================================================================
--- clang/lib/Sema/SemaStmt.cpp
+++ clang/lib/Sema/SemaStmt.cpp
@@ -2851,6 +2851,102 @@
   }
 }
 
+// Diagnoses when a string literal is used as the range for a range-based
+// for-loop, because the trailing null will unexpected be handled with the
+// rest of the characters in the string.  The warning can be silenced either
+// by add parentheses around the string literal or by explicitly handling
+// the null character with an if statement at the beginning of the loop body.
+static void DiagnoseForRangeStringLiteral(Sema &SemaRef,
+                                          const CXXForRangeStmt *ForStmt) {
+  if (SemaRef.Diags.isIgnored(diag::warn_for_range_string_literal,
+                              ForStmt->getBeginLoc())) {
+    return;
+  }
+
+  const VarDecl *VD = ForStmt->getLoopVariable();
+  if (!VD || !VD->getType()->isIntegerType())
+    return;
+
+  const Stmt *Init = ForStmt->getRangeInit();
+  if (!Init)
+    return;
+
+  const StringLiteral* Literal = dyn_cast<StringLiteral>(Init);
+  if (!Literal)
+    return;
+
+  const Stmt *FirstStmt = ForStmt->getBody();
+  if (!FirstStmt)
+    return;
+
+  if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(FirstStmt)) {
+    if (!CS->body_empty()) {
+      FirstStmt = CS->body_front();
+    }
+  }
+
+  // Returns true if Statement checks that variable Var is a null character.
+  auto IsNullCharacterCheck = [](const Stmt *Statement, const VarDecl *Var) {
+    const IfStmt *IS = dyn_cast<IfStmt>(Statement);
+    if (!IS)
+      return false;
+    const Expr* Condition = IS->getCond()->IgnoreParenImpCasts();
+
+    // if (c), direct conversion to bool
+    if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Condition)) {
+      return DRE->getDecl() == Var;
+    }
+
+    // if (c > 0), comparison against 0 or '\0'
+    const BinaryOperator *BO = dyn_cast<BinaryOperator>(Condition);
+    if (!BO || !BO->isComparisonOp())
+      return false;
+
+    const Expr *LHS = BO->getLHS()->IgnoreParenImpCasts();
+    const Expr *RHS = BO->getRHS()->IgnoreParenImpCasts();
+
+    auto IsNullCharOrZero = [](const Expr *E) {
+      if (const CharacterLiteral *Character = dyn_cast<CharacterLiteral>(E)) {
+        return Character->getValue() == 0;
+      }
+      if (const IntegerLiteral *Integer = dyn_cast<IntegerLiteral>(E)) {
+        return Integer->getValue() == 0;
+      }
+      return false;
+    };
+
+    auto IsVariable = [](const Expr *E, const VarDecl *Var) {
+      if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
+        return DRE->getDecl() == Var;
+      }
+      return false;
+    };
+
+    const bool NullCharOrZero = IsNullCharOrZero(LHS) || IsNullCharOrZero(RHS);
+    const bool Variable = IsVariable(LHS, Var) || IsVariable(RHS, Var);
+
+    if (NullCharOrZero && Variable) return true;
+
+    return false;
+  };
+
+  if (IsNullCharacterCheck(FirstStmt, VD)) return;
+
+  SourceRange Range = Literal->getSourceRange();
+  SemaRef.Diag(Literal->getExprLoc(), diag::warn_for_range_string_literal)
+      << Range;
+
+  SemaRef.Diag(FirstStmt->getBeginLoc(),
+               diag::note_for_range_string_literal_fix);
+
+  SourceLocation Open = Range.getBegin();
+  SourceLocation Close = SemaRef.getLocForEndOfToken(Range.getEnd());
+
+  SemaRef.Diag(Range.getBegin(),
+               diag::note_for_range_string_literal_silence)
+      << Range << FixItHint::CreateInsertion(Open, "(")
+      << FixItHint::CreateInsertion(Close, ")");
+}
 /// FinishCXXForRangeStmt - Attach the body to a C++0x for-range statement.
 /// This is a separate step from ActOnCXXForRangeStmt because analysis of the
 /// body cannot be performed until after the type of the range variable is
@@ -2869,6 +2965,7 @@
                         diag::warn_empty_range_based_for_body);
 
   DiagnoseForRangeVariableCopies(*this, ForStmt);
+  DiagnoseForRangeStringLiteral(*this, ForStmt);
 
   return S;
 }
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -28,6 +28,16 @@
   InGroup<ForLoopAnalysis>, DefaultIgnore;
 def note_loop_iteration_here : Note<"%select{decremented|incremented}0 here">;
 
+def warn_for_range_string_literal : Warning<
+  "range-based for-loop working over a string literal will process the "
+  "trailing null character">,
+  InGroup<RangeLoopAnalysis>, DefaultIgnore;
+def note_for_range_string_literal_fix : Note<
+  "fix by explicitly handling the null character at the beginning of the loop "
+  "body with an if statement">;
+def note_for_range_string_literal_silence : Note<
+  "place parenthesis around the string literal to silence this warning">;
+
 def warn_duplicate_enum_values : Warning<
   "element %0 has been implicitly assigned %1 which another element has "
   "been assigned">, InGroup<DiagGroup<"duplicate-enum">>, DefaultIgnore;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to