On Thu, Mar 8, 2012 at 1:59 PM, Richard Smith <[email protected]> wrote: > Author: rsmith > Date: Thu Mar 8 15:59:28 2012 > New Revision: 152344 > > URL: http://llvm.org/viewvc/llvm-project?rev=152344&view=rev > Log: > When checking the encoding of an 8-bit string literal, don't just check the > first codepoint! Also, don't reject empty raw string literals for spurious > "encoding" issues. Also, don't rely on undefined behavior in ConvertUTF.c. > > Modified: > cfe/trunk/include/clang/Basic/ConvertUTF.h > cfe/trunk/lib/Basic/ConvertUTF.c > cfe/trunk/lib/Lex/LiteralSupport.cpp > cfe/trunk/test/Lexer/cxx0x_raw_string_delim_length.cpp > cfe/trunk/test/Lexer/string-literal-encoding.c > > Modified: cfe/trunk/include/clang/Basic/ConvertUTF.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/ConvertUTF.h?rev=152344&r1=152343&r2=152344&view=diff > ============================================================================== > --- cfe/trunk/include/clang/Basic/ConvertUTF.h (original) > +++ cfe/trunk/include/clang/Basic/ConvertUTF.h Thu Mar 8 15:59:28 2012 > @@ -151,9 +151,11 @@ > ConversionResult ConvertUTF32toUTF16 ( > const UTF32** sourceStart, const UTF32* sourceEnd, > UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); > -#endif > > Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); > +#endif > + > +Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd); > > #ifdef __cplusplus > } > > Modified: cfe/trunk/lib/Basic/ConvertUTF.c > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/ConvertUTF.c?rev=152344&r1=152343&r2=152344&view=diff > ============================================================================== > --- cfe/trunk/lib/Basic/ConvertUTF.c (original) > +++ cfe/trunk/lib/Basic/ConvertUTF.c Thu Mar 8 15:59:28 2012 > @@ -387,7 +387,7 @@ > */ > Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { > int length = trailingBytesForUTF8[*source]+1; > - if (source+length > sourceEnd) { > + if (length > sourceEnd - source) { > return false; > } > return isLegalUTF8(source, length); > @@ -395,6 +395,22 @@ > > /* --------------------------------------------------------------------- */ > > +/* > + * Exported function to return whether a UTF-8 string is legal or not. > + * This is not used here; it's just exported. > + */ > +Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd) { > + while (source != sourceEnd) { > + int length = trailingBytesForUTF8[*source] + 1; > + if (length > sourceEnd - source || !isLegalUTF8(source, length)) > + return false; > + source += length; > + } > + return true; > +} > + > +/* --------------------------------------------------------------------- */ > + > ConversionResult ConvertUTF8toUTF16 ( > const UTF8** sourceStart, const UTF8* sourceEnd, > UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { > @@ -404,7 +420,7 @@ > while (source < sourceEnd) { > UTF32 ch = 0; > unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; > - if (source + extraBytesToRead >= sourceEnd) { > + if (extraBytesToRead >= sourceEnd - source) { > result = sourceExhausted; break; > } > /* Do this check whether lenient or strict */ > @@ -477,7 +493,7 @@ > while (source < sourceEnd) { > UTF32 ch = 0; > unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; > - if (source + extraBytesToRead >= sourceEnd) { > + if (extraBytesToRead >= sourceEnd - source) { > result = sourceExhausted; break; > } > /* Do this check whether lenient or strict */ > > Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=152344&r1=152343&r2=152344&view=diff > ============================================================================== > --- cfe/trunk/lib/Lex/LiteralSupport.cpp (original) > +++ cfe/trunk/lib/Lex/LiteralSupport.cpp Thu Mar 8 15:59:28 2012 > @@ -333,7 +333,7 @@ > /// decimal-constant integer-suffix > /// octal-constant integer-suffix > /// hexadecimal-constant integer-suffix > -/// user-defiend-integer-literal: [C++11 lex.ext] > +/// user-defined-integer-literal: [C++11 lex.ext] > /// decimal-literal ud-suffix > /// octal-literal ud-suffix > /// hexadecimal-literal ud-suffix > @@ -1167,17 +1167,14 @@ > ++ThisTokBuf; > ++ThisTokBuf; // skip '(' > > - // remove same number of characters from the end > - if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix)) > - ThisTokEnd -= (ThisTokBuf - Prefix); > + // Remove same number of characters from the end > + ThisTokEnd -= ThisTokBuf - Prefix; > + assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal"); > > // Copy the string over > - if (CopyStringFragment(StringRef(ThisTokBuf,ThisTokEnd-ThisTokBuf))) > - { > + if (CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf))) > if (DiagnoseBadString(StringToks[i])) > hadError = true; > - } > - > } else { > assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); > ++ThisTokBuf; // skip " > @@ -1204,11 +1201,9 @@ > } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); > > // Copy the character span over. > - if (CopyStringFragment(StringRef(InStart,ThisTokBuf-InStart))) > - { > + if (CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart))) > if (DiagnoseBadString(StringToks[i])) > hadError = true; > - } > continue; > } > // Is this a Universal Character Name escape? > @@ -1292,8 +1287,8 @@ > ConversionResult result = conversionOK; > // Copy the character span over. > if (CharByteWidth == 1) { > - if (!isLegalUTF8Sequence(reinterpret_cast<const UTF8*>(Fragment.begin()), > - reinterpret_cast<const UTF8*>(Fragment.end()))) > + if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(Fragment.begin()), > + reinterpret_cast<const UTF8*>(Fragment.end())))
Ah, I think that one is my fault... thanks for spotting it. -Eli _______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
