Okay, this isn't a patch, I just want to know if this is the direction you're looking for. The included changes will result in an error message like: tmp.cpp:3:10: error: illegal sequence in character literal char a = '<f1>'; ^ It doesn't do anything to try to make the highlighted ranges look correct for the modified source line. This will cause a problem similar to the one that already exists with multi-byte UTF-8 encodings, where carets and ranges do not appear correct after these sequences. ![]() Also, I think it would be better if these <XX> strings could be displayed with a reversed color scheme, just like vi, if the console supports it. Is there any possibility of doing that? Anyway, here are the changes I am contemplating: diff --git a/lib/Frontend/TextDiagnostic.cpp b/lib/Frontend/TextDiagnostic.cpp index d2b8660..464b81b 100644 --- a/lib/Frontend/TextDiagnostic.cpp +++ b/lib/Frontend/TextDiagnostic.cpp @@ -10,6 +10,7 @@ #include "clang/Frontend/TextDiagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/ConvertUTF.h" #include "clang/Frontend/DiagnosticOptions.h" #include "clang/Lex/Lexer.h" #include "llvm/Support/MemoryBuffer.h" @@ -17,6 +18,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/SmallString.h" #include <algorithm> + +#include <sstream> +#include <iomanip> + using namespace clang; static const enum raw_ostream::Colors noteColor = @@ -592,6 +597,36 @@ void TextDiagnostic::emitSnippetAndCaret( // Copy the line of code into an std::string for ease of manipulation. std::string SourceLine(LineStart, LineEnd); + + static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 + }; + + unsigned char const *i = reinterpret_cast<unsigned char const *>(LineStart); + unsigned char const *end = reinterpret_cast<unsigned char const *>(LineEnd); + std::string SourceLineMod; + while(i!=end) { + if(isLegalUTF8Sequence(i,end)) { + std::copy(i,i+trailingBytesForUTF8[*i]+1,back_inserter(SourceLineMod)); + i+=trailingBytesForUTF8[*i]+1; + } else { + std::stringstream ss; + ss.fill('0'); + ss << std::hex << '<' << std::setw(2) << (unsigned int)*i << '>'; + SourceLineMod.append(ss.str()); + ++i; + } + } + + SourceLine = SourceLineMod; + // Create a line for the caret that is filled with spaces that is the same // length as the line of source code. std::string CaretLine(LineEnd-LineStart, ' '); On Jan 19, 2012, at 10:36 PM, Seth Cantrell wrote:
|
_______________________________________________ cfe-commits mailing list cfe-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits