On Sun, Oct 28, 2012 at 11:24 AM, Seth Cantrell <[email protected]> wrote: > Author: socantre > Date: Sun Oct 28 13:24:46 2012 > New Revision: 166900 > > URL: http://llvm.org/viewvc/llvm-project?rev=166900&view=rev > Log: > improve highlighting of invalid string encodings > > limit highlight to exactly the bad encoding, and highlight every > bad encoding in a string. > > Modified: > cfe/trunk/lib/Lex/LiteralSupport.cpp > cfe/trunk/test/Misc/wrong-encoding.c > > Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=166900&r1=166899&r2=166900&view=diff > ============================================================================== > --- cfe/trunk/lib/Lex/LiteralSupport.cpp (original) > +++ cfe/trunk/lib/Lex/LiteralSupport.cpp Sun Oct 28 13:24:46 2012 > @@ -49,6 +49,20 @@ > } > } > > +static CharSourceRange MakeCharSourceRange(const LangOptions &Features, > + FullSourceLoc TokLoc, > + const char *TokBegin, > + const char *TokRangeBegin, > + const char *TokRangeEnd) { > + SourceLocation Begin = > + Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin, > + TokLoc.getManager(), Features); > + SourceLocation End = > + Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin, > + TokLoc.getManager(), Features); > + return CharSourceRange::getCharRange(Begin, End); > +} > + > /// \brief Produce a diagnostic highlighting some portion of a literal. > /// > /// Emits the diagnostic \p DiagID, highlighting the range of characters from > @@ -61,11 +75,8 @@ > SourceLocation Begin = > Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin, > TokLoc.getManager(), Features); > - SourceLocation End = > - Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin, > - TokLoc.getManager(), Features); > - return Diags->Report(Begin, DiagID) > - << CharSourceRange::getCharRange(Begin, End); > + return Diags->Report(Begin, DiagID) << > + MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, > TokRangeEnd); > } > > /// ProcessCharEscape - Parse a standard C escape sequence, which can occur > in > @@ -1372,6 +1383,15 @@ > } > } > > +static const char *resync_utf8(const char *err, const char *end) { > + if (err==end) > + return end; > + end = err + std::min<unsigned>(getNumBytesForUTF8(*err), end-err); > + while (++err!=end && (*err&0xC0)==0x80) > + ; > + return err; > +} > + > /// \brief This function copies from Fragment, which is a sequence of bytes > /// within Tok's contents (which begin at TokBegin) into ResultPtr. > /// Performs widening for multi-byte characters. > @@ -1381,7 +1401,6 @@ > const UTF8 *ErrorPtrTmp; > if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp)) > return false; > - const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp); > > // If we see bad encoding for unprefixed string literals, warn and > // simply copy the byte values, for compatibility with gcc and older > @@ -1391,12 +1410,31 @@ > memcpy(ResultPtr, Fragment.data(), Fragment.size()); > ResultPtr += Fragment.size(); > } > + > if (Diags) { > - Diag(Diags, Features, FullSourceLoc(Tok.getLocation(), SM), TokBegin, > - ErrorPtr, ErrorPtr + > std::min<unsigned>(getNumBytesForUTF8(*ErrorPtr), > - Fragment.end() - ErrorPtr), > - NoErrorOnBadEncoding ? diag::warn_bad_string_encoding > - : diag::err_bad_string_encoding); > + const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp); > + > + FullSourceLoc SourceLoc(Tok.getLocation(), SM); > + const DiagnosticBuilder &Builder = > + Diag(Diags, Features, SourceLoc, TokBegin, > + ErrorPtr, resync_utf8(ErrorPtr, Fragment.end()), > + NoErrorOnBadEncoding ? diag::warn_bad_string_encoding > + : diag::err_bad_string_encoding); > + > + char *SavedResultPtr = ResultPtr; > + const char *NextStart = resync_utf8(ErrorPtr, Fragment.end()); > + StringRef NextFragment(NextStart, Fragment.end()-NextStart); > + > + while (!ConvertUTF8toWide(CharByteWidth, NextFragment, ResultPtr, > + ErrorPtrTmp)) { > + const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp); > + NextStart = resync_utf8(ErrorPtr, Fragment.end()); > + Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin, > + ErrorPtr, NextStart);
This fails when you exceed the bounds of the DiagnosticBuilder's SourceRange range. I fixed this (by limiting the number of ranges we add to the diagnostic) in r167059. Feel free to try other solutions if that one isn't suitable in some way. > + NextFragment = StringRef(NextStart, Fragment.end()-NextStart); > + } > + > + ResultPtr = SavedResultPtr; > } > return !NoErrorOnBadEncoding; > } > > Modified: cfe/trunk/test/Misc/wrong-encoding.c > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Misc/wrong-encoding.c?rev=166900&r1=166899&r2=166900&view=diff > ============================================================================== > --- cfe/trunk/test/Misc/wrong-encoding.c (original) > +++ cfe/trunk/test/Misc/wrong-encoding.c Sun Oct 28 13:24:46 2012 > @@ -1,16 +1,33 @@ > -// RUN: %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck -strict-whitespace %s > +// RUN: %clang_cc1 -fsyntax-only -Wno-unused-value %s 2>&1 | FileCheck > -strict-whitespace %s > > void foo() { > > "§Ã"; // ø > // CHECK: {{^ "<A7><C3>"; // <F8>}} > -// CHECK: {{^ \^~~~}} > +// CHECK: {{^ \^~~~~~~}} > > /* þ« */ const char *d = "¥"; > > // CHECK: {{^ /\* <FE><AB> \*/ const char \*d = "<A5>";}} > // CHECK: {{^ \^~~~}} > > -// CHECK: {{^ "<A7><C3>"; // <F8>}} > -// CHECK: {{^ \^~~~~~~~~~}} > + "xxé¿¿¿d"; > +// CHECK: {{^ "xx<U\+9FFF><BF>d";}} > +// CHECK: {{^ \^~~~}} > + > + "xxé¿bcd"; > +// CHECK: {{^ "xx<E9><BF>bcd";}} > +// CHECK: {{^ \^~~~~~~~}} > + > + "xxéabcd"; > +// CHECK: {{^ "xx<E9>abcd";}} > +// CHECK: {{^ \^~~~}} > + > + "xxé¿é¿d"; > +// CHECK: {{^ "xx<E9><BF><E9><BF>d";}} > +// CHECK: {{^ \^~~~~~~~~~~~~~~}} > + > + "xxé¿xxxxxxxxxxxxxxxxxxxxxé¿xx"; > +// CHECK: {{^ "xx<E9><BF>xxxxxxxxxxxxxxxxxxxxx<E9><BF>xx";}} > +// CHECK: {{^ \^~~~~~~~ ~~~~~~~~}} > } > > > _______________________________________________ > cfe-commits mailing list > [email protected] > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
