+ assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) && + "Only narrow string literals are currently supported");
If a non-narrow string-literal is encountered with asserts off, will this just continue on and silently corrupt the rest of the compilation? Or will parsing the non-narrow string literal gracefully fail somewhere earlier in the pipeline? On Tue, Jun 12, 2012 at 10:37 PM, Richard Smith <[email protected]>wrote: > Author: rsmith > Date: Wed Jun 13 00:37:23 2012 > New Revision: 158390 > > URL: http://llvm.org/viewvc/llvm-project?rev=158390&view=rev > Log: > PR13099: Teach -Wformat about raw string literals, UTF-8 strings and > Unicode escape sequences. > > Modified: > cfe/trunk/lib/AST/Expr.cpp > cfe/trunk/lib/Lex/LiteralSupport.cpp > cfe/trunk/lib/Sema/SemaChecking.cpp > cfe/trunk/test/SemaCXX/format-strings-0x.cpp > > Modified: cfe/trunk/lib/AST/Expr.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Expr.cpp?rev=158390&r1=158389&r2=158390&view=diff > > ============================================================================== > --- cfe/trunk/lib/AST/Expr.cpp (original) > +++ cfe/trunk/lib/AST/Expr.cpp Wed Jun 13 00:37:23 2012 > @@ -679,7 +679,8 @@ > SourceLocation StringLiteral:: > getLocationOfByte(unsigned ByteNo, const SourceManager &SM, > const LangOptions &Features, const TargetInfo &Target) > const { > - assert(Kind == StringLiteral::Ascii && "This only works for ASCII > strings"); > + assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) && > + "Only narrow string literals are currently supported"); > > // Loop over all of the tokens in this string until we find the one that > // contains the byte we're looking for. > > Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=158390&r1=158389&r2=158390&view=diff > > ============================================================================== > --- cfe/trunk/lib/Lex/LiteralSupport.cpp (original) > +++ cfe/trunk/lib/Lex/LiteralSupport.cpp Wed Jun 13 00:37:23 2012 > @@ -250,6 +250,39 @@ > return true; > } > > +/// MeasureUCNEscape - Determine the number of bytes within the resulting > string > +/// which this UCN will occupy. > +static int MeasureUCNEscape(const char *ThisTokBegin, const char > *&ThisTokBuf, > + const char *ThisTokEnd, unsigned > CharByteWidth, > + const LangOptions &Features, bool &HadError) { > + // UTF-32: 4 bytes per escape. > + if (CharByteWidth == 4) > + return 4; > + > + uint32_t UcnVal = 0; > + unsigned short UcnLen = 0; > + FullSourceLoc Loc; > + > + if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, > + UcnLen, Loc, 0, Features, true)) { > + HadError = true; > + return 0; > + } > + > + // UTF-16: 2 bytes for BMP, 4 bytes otherwise. > + if (CharByteWidth == 2) > + return UcnVal <= 0xFFFF ? 2 : 4; > + > + // UTF-8. > + if (UcnVal < 0x80) > + return 1; > + if (UcnVal < 0x800) > + return 2; > + if (UcnVal < 0x10000) > + return 3; > + return 4; > +} > + > /// EncodeUCNEscape - Read the Universal Character Name, check > constraints and > /// convert the UTF32 to UTF8 or UTF16. This is a subroutine of > /// StringLiteralParser. When we decide to implement UCN's for > identifiers, > @@ -265,7 +298,7 @@ > unsigned short UcnLen = 0; > if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, > UcnLen, > Loc, Diags, Features, true)) { > - HadError = 1; > + HadError = true; > return; > } > > @@ -1369,14 +1402,31 @@ > if (StringInvalid) > return 0; > > + const char *SpellingStart = SpellingPtr; > + const char *SpellingEnd = SpellingPtr+TokLen; > + > + // Handle UTF-8 strings just like narrow strings. > + if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8') > + SpellingPtr += 2; > + > assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' && > SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings > yet"); > > + // For raw string literals, this is easy. > + if (SpellingPtr[0] == 'R') { > + assert(SpellingPtr[1] == '"' && "Should be a raw string literal!"); > + // Skip 'R"'. > + SpellingPtr += 2; > + while (*SpellingPtr != '(') { > + ++SpellingPtr; > + assert(SpellingPtr < SpellingEnd && "Missing ( for raw string > literal"); > + } > + // Skip '('. > + ++SpellingPtr; > + return SpellingPtr - SpellingStart + ByteNo; > + } > > - const char *SpellingStart = SpellingPtr; > - const char *SpellingEnd = SpellingPtr+TokLen; > - > - // Skip over the leading quote. > + // Skip over the leading quote > assert(SpellingPtr[0] == '"' && "Should be a string literal!"); > ++SpellingPtr; > > @@ -1393,11 +1443,23 @@ > > // Otherwise, this is an escape character. Advance over it. > bool HadError = false; > - ProcessCharEscape(SpellingPtr, SpellingEnd, HadError, > - FullSourceLoc(Tok.getLocation(), SM), > - CharByteWidth*8, Diags); > + if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U') { > + const char *EscapePtr = SpellingPtr; > + unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, > SpellingEnd, > + 1, Features, HadError); > + if (Len > ByteNo) { > + // ByteNo is somewhere within the escape sequence. > + SpellingPtr = EscapePtr; > + break; > + } > + ByteNo -= Len; > + } else { > + ProcessCharEscape(SpellingPtr, SpellingEnd, HadError, > + FullSourceLoc(Tok.getLocation(), SM), > + CharByteWidth*8, Diags); > + --ByteNo; > + } > assert(!HadError && "This method isn't valid on erroneous strings"); > - --ByteNo; > } > > return SpellingPtr-SpellingStart; > > Modified: cfe/trunk/lib/Sema/SemaChecking.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=158390&r1=158389&r2=158390&view=diff > > ============================================================================== > --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) > +++ cfe/trunk/lib/Sema/SemaChecking.cpp Wed Jun 13 00:37:23 2012 > @@ -2633,7 +2633,7 @@ > bool inFunctionCall) { > > // CHECK: is the format string a wide literal? > - if (!FExpr->isAscii()) { > + if (!FExpr->isAscii() && !FExpr->isUTF8()) { > CheckFormatHandler::EmitFormatDiagnostic( > *this, inFunctionCall, Args[format_idx], > PDiag(diag::warn_format_string_is_wide_literal), > FExpr->getLocStart(), > > Modified: cfe/trunk/test/SemaCXX/format-strings-0x.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/format-strings-0x.cpp?rev=158390&r1=158389&r2=158390&view=diff > > ============================================================================== > --- cfe/trunk/test/SemaCXX/format-strings-0x.cpp (original) > +++ cfe/trunk/test/SemaCXX/format-strings-0x.cpp Wed Jun 13 00:37:23 2012 > @@ -12,4 +12,16 @@ > scanf("%afoobar", fp); > printf(nullptr); > printf(*sp); // expected-warning {{not a string literal}} > + > + // PR13099 > + printf( > + R"foobar(%)foobar" > + R"bazquux(d)bazquux" // expected-warning {{more '%' conversions than > data arguments}} > + R"xyzzy()xyzzy"); > + > + printf(u8"this is %d test", 0); // ok > + printf(u8R"foo( > + \u1234\U0010fffe > + %d)foo" // expected-warning {{more '%' conversions than data > arguments}} > + ); > } > > > _______________________________________________ > cfe-commits mailing list > [email protected] > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
