bruno updated this revision to Diff 51859.
bruno added a comment.

Update after Richard's review.

- Handle scanf
- Properly update `ConversionSpecifier`


http://reviews.llvm.org/D18296

Files:
  include/clang/Analysis/Analyses/FormatString.h
  lib/Analysis/FormatString.cpp
  lib/Analysis/FormatStringParsing.h
  lib/Analysis/PrintfFormatString.cpp
  lib/Analysis/ScanfFormatString.cpp
  lib/Sema/SemaChecking.cpp
  test/SemaObjC/format-strings-objc.m

Index: test/SemaObjC/format-strings-objc.m
===================================================================
--- test/SemaObjC/format-strings-objc.m
+++ test/SemaObjC/format-strings-objc.m
@@ -265,3 +265,16 @@
   NSLog(@"%2$[tt]@ %1$[tt]s", @"Foo", @"Bar"); // expected-warning {{object format flags cannot be used with 's' conversion specifier}}
 }
 
+// Test Objective-C invalid no printable specifiers
+void testObjcInvalidNoPrintable(int *a) {
+  NSLog(@"%\u25B9", 3); // expected-warning {{invalid conversion specifier '\u25b9'}}
+  NSLog(@"%\xE2\x96\xB9", 3); // expected-warning {{invalid conversion specifier '\u25b9'}}
+  NSLog(@"%\U00010348", 42); // expected-warning {{invalid conversion specifier '\U00010348'}}
+  NSLog(@"%\xF0\x90\x8D\x88", 42); // expected-warning {{invalid conversion specifier '\U00010348'}}
+  NSLog(@"%\xe2", @"Foo"); // expected-warning {{input conversion stopped}} expected-warning {{invalid conversion specifier '\xe2'}}
+  scanf("%\u25B9", a); // expected-warning {{implicitly declaring library}} expected-note {{include the header}} expected-warning {{invalid conversion specifier '\u25b9'}}
+  scanf("%\xE2\x96\xB9", a); // expected-warning {{invalid conversion specifier '\u25b9'}}
+  scanf("%\U00010348", a); // expected-warning {{invalid conversion specifier '\U00010348'}}
+  scanf("%\xF0\x90\x8D\x88", a); // expected-warning {{invalid conversion specifier '\U00010348'}}
+  scanf("%\xe2", a); // expected-warning {{invalid conversion specifier '\xe2'}}
+}
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -36,6 +36,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Locale.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/raw_ostream.h"
 #include <limits>
@@ -3976,12 +3978,41 @@
     // gibberish when trying to match arguments.
     keepGoing = false;
   }
-  
-  EmitFormatDiagnostic(S.PDiag(diag::warn_format_invalid_conversion)
-                         << StringRef(csStart, csLen),
-                       Loc, /*IsStringLocation*/true,
-                       getSpecifierRange(startSpec, specifierLen));
-  
+
+  StringRef Specifier(csStart, csLen);
+
+  // If the specifier in non-printable, it could be the first byte of a UTF-8
+  // sequence. In that case, print the UTF-8 code point. If not, print the byte
+  // hex value.
+  std::string CodePointStr;
+  if (!llvm::sys::locale::isPrint(*csStart)) {
+    UTF32 CodePoint;
+    const UTF8 **B = reinterpret_cast<const UTF8 **>(&csStart);
+    const UTF8 *E =
+        reinterpret_cast<const UTF8 *>(csStart + csLen);
+    ConversionResult Result =
+        llvm::convertUTF8Sequence(B, E, &CodePoint, strictConversion);
+
+    if (Result != conversionOK) {
+      unsigned char FirstChar = *csStart;
+      CodePoint = (UTF32)FirstChar;
+    }
+
+    llvm::raw_string_ostream OS(CodePointStr);
+    if (CodePoint < 256)
+      OS << "\\x" << llvm::format("%02x", CodePoint);
+    else if (CodePoint <= 0xFFFF)
+      OS << "\\u" << llvm::format("%04x", CodePoint);
+    else
+      OS << "\\U" << llvm::format("%08x", CodePoint);
+    OS.flush();
+    Specifier = CodePointStr;
+  }
+
+  EmitFormatDiagnostic(
+      S.PDiag(diag::warn_format_invalid_conversion) << Specifier, Loc,
+      /*IsStringLocation*/ true, getSpecifierRange(startSpec, specifierLen));
+
   return keepGoing;
 }
 
Index: lib/Analysis/ScanfFormatString.cpp
===================================================================
--- lib/Analysis/ScanfFormatString.cpp
+++ lib/Analysis/ScanfFormatString.cpp
@@ -79,7 +79,7 @@
                                                 unsigned &argIndex,
                                                 const LangOptions &LO,
                                                 const TargetInfo &Target) {
-  
+  using namespace clang::analyze_format_string;
   using namespace clang::analyze_scanf;
   const char *I = Beg;
   const char *Start = nullptr;
@@ -210,10 +210,15 @@
   
   // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
   // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
-  
+
   if (k == ScanfConversionSpecifier::InvalidSpecifier) {
+    unsigned Len = I - Beg;
+    if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
+      CS.setEndScanList(Beg + Len);
+      FS.setConversionSpecifier(CS);
+    }
     // Assume the conversion takes one argument.
-    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
+    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
   }
   return ScanfSpecifierResult(Start, FS);
 }
Index: lib/Analysis/PrintfFormatString.cpp
===================================================================
--- lib/Analysis/PrintfFormatString.cpp
+++ lib/Analysis/PrintfFormatString.cpp
@@ -312,8 +312,13 @@
     argIndex++;
 
   if (k == ConversionSpecifier::InvalidSpecifier) {
+    unsigned Len = I - Start;
+    if (ParseUTF8InvalidSpecifier(Start, E, Len)) {
+      CS.setEndScanList(Start + Len);
+      FS.setConversionSpecifier(CS);
+    }
     // Assume the conversion takes one argument.
-    return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
+    return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, Len);
   }
   return PrintfSpecifierResult(Start, FS);
 }
Index: lib/Analysis/FormatStringParsing.h
===================================================================
--- lib/Analysis/FormatStringParsing.h
+++ lib/Analysis/FormatStringParsing.h
@@ -46,7 +46,13 @@
 /// FormatSpecifier& argument, and false otherwise.
 bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E,
                          const LangOptions &LO, bool IsScanf = false);
-  
+
+/// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8
+/// string; check that it won't go further than \p FmtStrEnd and write
+/// up the total size in \p Len.
+bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin,
+                               const char *FmtStrEnd, unsigned &Len);
+
 template <typename T> class SpecifierResult {
   T FS;
   const char *Start;
Index: lib/Analysis/FormatString.cpp
===================================================================
--- lib/Analysis/FormatString.cpp
+++ lib/Analysis/FormatString.cpp
@@ -15,6 +15,8 @@
 #include "FormatStringParsing.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Locale.h"
 
 using clang::analyze_format_string::ArgType;
 using clang::analyze_format_string::FormatStringHandler;
@@ -260,6 +262,26 @@
   return true;
 }
 
+bool clang::analyze_format_string::ParseUTF8InvalidSpecifier(
+    const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) {
+  if (SpecifierBegin + 1 >= FmtStrEnd)
+    return false;
+
+  const UTF8 *SB = reinterpret_cast<const UTF8 *>(SpecifierBegin + 1);
+  const UTF8 *SE = reinterpret_cast<const UTF8 *>(FmtStrEnd);
+  const char FirstByte = *SB;
+
+  // If the specifier is non-printable, it could be the first byte of a
+  // UTF-8 sequence. If that's the case, adjust the length accordingly.
+  if (llvm::sys::locale::isPrint(FirstByte))
+    return false;
+  if (!isLegalUTF8String(&SB, SE))
+    return false;
+
+  Len = getNumBytesForUTF8(FirstByte) + 1;
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // Methods on ArgType.
 //===----------------------------------------------------------------------===//
Index: include/clang/Analysis/Analyses/FormatString.h
===================================================================
--- include/clang/Analysis/Analyses/FormatString.h
+++ include/clang/Analysis/Analyses/FormatString.h
@@ -210,6 +210,7 @@
   unsigned getLength() const {
     return EndScanList ? EndScanList - Position : 1;
   }
+  void setEndScanList(const char *pos) { EndScanList = pos; }
 
   bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
     kind == FreeBSDrArg || kind == FreeBSDyArg; }
@@ -413,11 +414,6 @@
   bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
   bool isDoubleArg() const { return kind >= DoubleArgBeg &&
                                     kind <= DoubleArgEnd; }
-  unsigned getLength() const {
-      // Conversion specifiers currently only are represented by
-      // single characters, but we be flexible.
-    return 1;
-  }
 
   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
     return CS->isPrintfKind();
@@ -546,8 +542,6 @@
   ScanfConversionSpecifier(const char *pos, Kind k)
     : ConversionSpecifier(false, pos, k) {}
 
-  void setEndScanList(const char *pos) { EndScanList = pos; }
-
   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
     return !CS->isPrintfKind();
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to