On Fri, Jul 12, 2013 at 5:28 PM, Reid Kleckner <[email protected]> wrote:
> MSBuild writes response files as UTF-16 little endian with a byte order > mark (0xFEFF). With this change, clang will be able to read them, > although we > still can't parse any of their flags. BOMs with non-native endianness are > recognized as 0xFFFE. Byteswapping them is TODO. > > http://llvm-reviews.chandlerc.com/D1137 > > Files: > test/Driver/at_file.c > test/Driver/at_file.c.args.utf16le > tools/driver/driver.cpp > > Index: test/Driver/at_file.c > =================================================================== > --- test/Driver/at_file.c > +++ test/Driver/at_file.c > @@ -1,5 +1,7 @@ > // RUN: %clang -E %s @%s.args -o %t.log > // RUN: FileCheck --input-file=%t.log %s > +// RUN: %clang -E %s @%s.args.utf16le -o %t.log > +// RUN: FileCheck --input-file=%t.log %s > > // CHECK: bar1 > // CHECK-NEXT: bar2 zed2 > Index: tools/driver/driver.cpp > =================================================================== > --- tools/driver/driver.cpp > +++ tools/driver/driver.cpp > @@ -25,9 +25,11 @@ > #include "llvm/ADT/OwningPtr.h" > #include "llvm/ADT/SmallString.h" > #include "llvm/ADT/SmallVector.h" > +#include "llvm/ADT/STLExtras.h" > #include "llvm/Option/ArgList.h" > #include "llvm/Option/OptTable.h" > #include "llvm/Option/Option.h" > +#include "llvm/Support/ConvertUTF.h" > #include "llvm/Support/ErrorHandling.h" > #include "llvm/Support/FileSystem.h" > #include "llvm/Support/Host.h" > @@ -199,16 +201,51 @@ > } > > const char *Buf = MemBuf->getBufferStart(); > + const char *BufEnd = MemBuf->getBufferEnd() + 1; > char InQuote = ' '; > std::string CurArg; > > + UTF16 MaybeBOM = 0; > + memcpy(&MaybeBOM, Buf, 2); > + const UTF16 *Src = 0, *SrcEnd = 0; > + UTF8 UTF8Buf[4096]; > + > + if (MaybeBOM == 0xFFFE) { > + // Byte-swapped endianness UTF16 BOM. We can't issue a diagnostic, so > + // pretend we didn't realize this was a response file. > + ArgVector.push_back(SaveStringInSet(SavedStrings, Arg)); > + return; > + } else if (MaybeBOM == 0xFEFF) { > + // Native endianness UTF16 BOM. Convert a chunk at a time as needed. > + Src = reinterpret_cast<const UTF16 *>(Buf) + 1; > + SrcEnd = reinterpret_cast<const UTF16 *>(BufEnd); > Is BufEnd guaranteed to be aligned on a UTF16* boundary here? > + Buf = 0; > + BufEnd = 0; > + } > + > for (const char *P = Buf; ; ++P) { > + if (P == BufEnd) { > + assert(Src); > + UTF8 *Dst = &UTF8Buf[0]; > + ConversionResult CR = ConvertUTF16toUTF8( > + &Src, SrcEnd, &Dst, llvm::array_endof(UTF8Buf), > strictConversion); > + if (CR == sourceExhausted) > + break; > + if (CR != conversionOK && CR != targetExhausted) > + return; // FIXME: Fail more loudly. > + // Dst was updated to be one past the last translated byte. > + P = reinterpret_cast<char *>(UTF8Buf); > + BufEnd = reinterpret_cast<char *>(Dst); > + } > + > if (*P == '\0' || (isWhitespace(*P) && InQuote == ' ')) { > if (!CurArg.empty()) { > > if (CurArg[0] != '@') { > ArgVector.push_back(SaveStringInSet(SavedStrings, CurArg)); > } else { > + // FIXME: A response file can refer to itself and cause infinite > + // recursion. > ExpandArgsFromBuf(CurArg.c_str(), ArgVector, SavedStrings); > } > > _______________________________________________ > cfe-commits mailing list > [email protected] > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits > >
_______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
