llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-support Author: Abhina Sree (abhina-sree) <details> <summary>Changes</summary> --- Patch is 42.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/196568.diff 18 Files Affected: - (modified) clang/include/clang/AST/Expr.h (+6) - (modified) clang/include/clang/AST/FormatString.h (+7-5) - (modified) clang/include/clang/Basic/TargetInfo.h (+3) - (modified) clang/include/clang/Lex/TextEncodingConfig.h (+2-1) - (modified) clang/include/clang/Sema/Sema.h (+1-1) - (modified) clang/lib/AST/Expr.cpp (+14) - (modified) clang/lib/AST/FormatString.cpp (+46-40) - (modified) clang/lib/AST/FormatStringParsing.h (+25-11) - (modified) clang/lib/AST/PrintfFormatString.cpp (+58-31) - (modified) clang/lib/AST/ScanfFormatString.cpp (+15-8) - (modified) clang/lib/Basic/TargetInfo.cpp (+3) - (modified) clang/lib/Frontend/CompilerInstance.cpp (+1-1) - (modified) clang/lib/Lex/TextEncodingConfig.cpp (+10-1) - (modified) clang/lib/Sema/SemaChecking.cpp (+33-21) - (modified) clang/lib/Sema/SemaExpr.cpp (+3-2) - (modified) clang/test/CodeGen/systemz-charset.c (+2) - (modified) llvm/include/llvm/Support/TextEncoding.h (+10) - (modified) llvm/lib/Support/TextEncoding.cpp (+19) ``````````diff diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 393fe275c6269..d01afcff4095d 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -28,6 +28,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/SyncScope.h" #include "clang/Basic/TypeTraits.h" +#include "clang/Lex/TextEncodingConfig.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallVector.h" @@ -2066,6 +2067,11 @@ class PredefinedExpr final return getIdentKindName(getIdentKind()); } + static std::string + ComputeNameAndTranslate(PredefinedIdentKind IK, const Decl *CurrentDecl, + TextEncodingConfig &TEC, + bool ForceElaboratedPrinting = false); + static std::string ComputeName(PredefinedIdentKind IK, const Decl *CurrentDecl, bool ForceElaboratedPrinting = false); diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index a3382e1a1d007..a24ade2d71ee9 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -19,6 +19,7 @@ #define LLVM_CLANG_AST_FORMATSTRING_H #include "clang/AST/CanonicalType.h" +#include "llvm/Support/TextEncoding.h" #include <optional> namespace clang { @@ -728,7 +729,8 @@ class FormatStringHandler { virtual bool HandleInvalidPrintfConversionSpecifier( const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier, - unsigned specifierLen) { + unsigned specifierLen, + const llvm::TextEncodingConverter &FormatStrConverter) { return true; } @@ -744,10 +746,10 @@ class FormatStringHandler { // Scanf-specific handlers. - virtual bool - HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, - const char *startSpecifier, - unsigned specifierLen) { + virtual bool HandleInvalidScanfConversionSpecifier( + const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, + unsigned specifierLen, + const llvm::TextEncodingConverter &FormatStrConverter) { return true; } diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 9f7d2a17a0f8a..ec7d4fcd4d8e3 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -38,6 +38,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Error.h" +#include "llvm/Support/TextEncoding.h" #include "llvm/Support/VersionTuple.h" #include "llvm/TargetParser/Triple.h" #include <cassert> @@ -323,6 +324,8 @@ class TargetInfo : public TransferrableTargetInfo, virtual ~TargetInfo(); + llvm::TextEncodingConverter *FormatStrConverter; + /// Retrieve the target options. TargetOptions &getTargetOpts() const { assert(TargetOpts && "Missing target options"); diff --git a/clang/include/clang/Lex/TextEncodingConfig.h b/clang/include/clang/Lex/TextEncodingConfig.h index 09967a81beeed..f4ef578eb2991 100644 --- a/clang/include/clang/Lex/TextEncodingConfig.h +++ b/clang/include/clang/Lex/TextEncodingConfig.h @@ -26,7 +26,8 @@ class TextEncodingConfig { llvm::TextEncodingConverter *getConverter(ConversionAction Action) const; static std::error_code setConvertersFromOptions(TextEncodingConfig &TEC, - const clang::LangOptions &Opts); + const clang::LangOptions &Opts, + clang::TargetInfo &TInfo); llvm::StringRef getExecEncoding() { return ExecEncoding; } }; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e2bc5593efa97..8ac5cc175fd2f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -55,7 +55,7 @@ #include "clang/Basic/TemplateKinds.h" #include "clang/Basic/TokenKinds.h" #include "clang/Basic/TypeTraits.h" -#include "clang/Lex/LiteralConverter.h" +#include "clang/Lex/TextEncodingConfig.h" #include "clang/Sema/AnalysisBasedWarnings.h" #include "clang/Sema/Attr.h" #include "clang/Sema/CleanupInfo.h" diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 64d61dbc3d128..e067df4cefd7b 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -668,6 +668,20 @@ StringRef PredefinedExpr::getIdentKindName(PredefinedIdentKind IK) { llvm_unreachable("Unknown ident kind for PredefinedExpr"); } +std::string PredefinedExpr::ComputeNameAndTranslate( + PredefinedIdentKind IK, const Decl *CurrentDecl, TextEncodingConfig &TEC, + bool ForceElaboratedPrinting) { + using namespace clang::charinfo; + std::string Result = ComputeName(IK, CurrentDecl, ForceElaboratedPrinting); + llvm::TextEncodingConverter *Converter = TEC.getConverter(CA_ToExecEncoding); + if (Converter) { + SmallString<128> Converted; + Converter->convert(Result, Converted); + Result = std::string(Converted); + } + return Result; +} + // FIXME: Maybe this should use DeclPrinter with a special "print predefined // expr" policy instead. std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK, diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index 7e1ac0de6dcaf..0d449fb5f0904 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -33,8 +33,9 @@ FormatStringHandler::~FormatStringHandler() {} // scanf format strings. //===----------------------------------------------------------------------===// -OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg, - const char *E) { +OptionalAmount clang::analyze_format_string::ParseAmount( + const char *&Beg, const char *E, + const llvm::TextEncodingConverter &FormatStrConverter) { const char *I = Beg; UpdateOnReturn<const char *> UpdateBeg(Beg, I); @@ -42,7 +43,7 @@ OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg, bool hasDigits = false; for (; I != E; ++I) { - char c = *I; + char c = FormatStrConverter.convert(*I); if (c >= '0' && c <= '9') { hasDigits = true; accumulator = (accumulator * 10) + (c - '0'); @@ -60,21 +61,22 @@ OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg, } OptionalAmount clang::analyze_format_string::ParseNonPositionAmount( - const char *&Beg, const char *E, unsigned &argIndex) { - if (*Beg == '*') { + const char *&Beg, const char *E, unsigned &argIndex, + const llvm::TextEncodingConverter &FormatStrConverter) { + if (FormatStrConverter.convert(*Beg) == '*') { ++Beg; return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); } - return ParseAmount(Beg, E); + return ParseAmount(Beg, E, FormatStrConverter); } OptionalAmount clang::analyze_format_string::ParsePositionAmount( FormatStringHandler &H, const char *Start, const char *&Beg, const char *E, - PositionContext p) { - if (*Beg == '*') { + PositionContext p, const llvm::TextEncodingConverter &FormatStrConverter) { + if (FormatStrConverter.convert(*Beg) == '*') { const char *I = Beg + 1; - const OptionalAmount &Amt = ParseAmount(I, E); + const OptionalAmount &Amt = ParseAmount(I, E, FormatStrConverter); if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { H.HandleInvalidPosition(Beg, I - Beg, p); @@ -89,7 +91,7 @@ OptionalAmount clang::analyze_format_string::ParsePositionAmount( assert(Amt.getHowSpecified() == OptionalAmount::Constant); - if (*I == '$') { + if (FormatStrConverter.convert(*I) == '$') { // Handle positional arguments // Special case: '*0$', since this is an easy mistake. @@ -109,18 +111,21 @@ OptionalAmount clang::analyze_format_string::ParsePositionAmount( return OptionalAmount(false); } - return ParseAmount(Beg, E); + return ParseAmount(Beg, E, FormatStrConverter); } bool clang::analyze_format_string::ParseFieldWidth( FormatStringHandler &H, FormatSpecifier &CS, const char *Start, - const char *&Beg, const char *E, unsigned *argIndex) { + const char *&Beg, const char *E, unsigned *argIndex, + const llvm::TextEncodingConverter &FormatStrConverter) { // FIXME: Support negative field widths. if (argIndex) { - CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); + CS.setFieldWidth( + ParseNonPositionAmount(Beg, E, *argIndex, FormatStrConverter)); } else { const OptionalAmount Amt = ParsePositionAmount( - H, Start, Beg, E, analyze_format_string::FieldWidthPos); + H, Start, Beg, E, analyze_format_string::FieldWidthPos, + FormatStrConverter); if (Amt.isInvalid()) return true; @@ -129,14 +134,13 @@ bool clang::analyze_format_string::ParseFieldWidth( return false; } -bool clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, - FormatSpecifier &FS, - const char *Start, - const char *&Beg, - const char *E) { +bool clang::analyze_format_string::ParseArgPosition( + FormatStringHandler &H, FormatSpecifier &FS, const char *Start, + const char *&Beg, const char *E, + const llvm::TextEncodingConverter &FormatStrConverter) { const char *I = Beg; - const OptionalAmount &Amt = ParseAmount(I, E); + const OptionalAmount &Amt = ParseAmount(I, E, FormatStrConverter); if (I == E) { // No more characters left? @@ -144,7 +148,8 @@ bool clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, return true; } - if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { + if (Amt.getHowSpecified() == OptionalAmount::Constant && + FormatStrConverter.convert(*(I++)) == '$') { // Warn that positional arguments are non-standard. H.HandlePosition(Start, I - Start); @@ -165,16 +170,15 @@ bool clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, return false; } -bool clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H, - FormatSpecifier &FS, - const char *&I, - const char *E, - const LangOptions &LO) { +bool clang::analyze_format_string::ParseVectorModifier( + FormatStringHandler &H, FormatSpecifier &FS, const char *&I, const char *E, + const LangOptions &LO, + const llvm::TextEncodingConverter &FormatStrConverter) { if (!LO.OpenCL) return false; const char *Start = I; - if (*I == 'v') { + if (FormatStrConverter.convert(*I) == 'v') { ++I; if (I == E) { @@ -182,7 +186,7 @@ bool clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H, return true; } - OptionalAmount NumElts = ParseAmount(I, E); + OptionalAmount NumElts = ParseAmount(I, E, FormatStrConverter); if (NumElts.getHowSpecified() != OptionalAmount::Constant) { H.HandleIncompleteSpecifier(Start, E - Start); return true; @@ -194,22 +198,20 @@ bool clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H, return false; } -bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, - const char *&I, - const char *E, - const LangOptions &LO, - bool IsScanf) { +bool clang::analyze_format_string::ParseLengthModifier( + FormatSpecifier &FS, const char *&I, const char *E, const LangOptions &LO, + const llvm::TextEncodingConverter &FormatStrConverter, bool IsScanf) { LengthModifier::Kind lmKind = LengthModifier::None; const char *lmPosition = I; - switch (*I) { + switch (FormatStrConverter.convert(*I)) { default: return false; case 'h': ++I; - if (I != E && *I == 'h') { + if (I != E && FormatStrConverter.convert(*I) == 'h') { ++I; lmKind = LengthModifier::AsChar; - } else if (I != E && *I == 'l' && LO.OpenCL) { + } else if (I != E && FormatStrConverter.convert(*I) == 'l' && LO.OpenCL) { ++I; lmKind = LengthModifier::AsShortLong; } else { @@ -218,7 +220,7 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, break; case 'l': ++I; - if (I != E && *I == 'l') { + if (I != E && FormatStrConverter.convert(*I) == 'l') { ++I; lmKind = LengthModifier::AsLongLong; } else { @@ -251,7 +253,9 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, // be parsed as the GNU extension 'a' length modifier. If not, this // will be parsed as a conversion specifier. ++I; - if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { + if (I != E && (FormatStrConverter.convert(*I) == 's' || + FormatStrConverter.convert(*I) == 'S' || + FormatStrConverter.convert(*I) == '[')) { lmKind = LengthModifier::AsAllocate; break; } @@ -269,7 +273,8 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, // scanf: AsInt64 case 'I': if (I + 1 != E && I + 2 != E) { - if (I[1] == '6' && I[2] == '4') { + if (FormatStrConverter.convert(I[1]) == '6' && + FormatStrConverter.convert(I[2]) == '4') { I += 3; lmKind = LengthModifier::AsInt64; break; @@ -277,7 +282,8 @@ bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, if (IsScanf) return false; - if (I[1] == '3' && I[2] == '2') { + if (FormatStrConverter.convert(I[1]) == '3' && + FormatStrConverter.convert(I[2]) == '2') { I += 3; lmKind = LengthModifier::AsInt32; break; diff --git a/clang/lib/AST/FormatStringParsing.h b/clang/lib/AST/FormatStringParsing.h index 401528481a9d6..531bc291e0b5b 100644 --- a/clang/lib/AST/FormatStringParsing.h +++ b/clang/lib/AST/FormatStringParsing.h @@ -35,29 +35,43 @@ template <typename T> class UpdateOnReturn { namespace analyze_format_string { -OptionalAmount ParseAmount(const char *&Beg, const char *E); -OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E, - unsigned &argIndex); +OptionalAmount +ParseAmount(const char *&Beg, const char *E, + const llvm::TextEncodingConverter &FormatStrConverter); -OptionalAmount ParsePositionAmount(FormatStringHandler &H, const char *Start, - const char *&Beg, const char *E, - PositionContext p); +OptionalAmount +ParseNonPositionAmount(const char *&Beg, const char *E, unsigned &argIndex, + const llvm::TextEncodingConverter &FormatStrConverter); + +OptionalAmount +ParsePositionAmount(FormatStringHandler &H, const char *Start, const char *&Beg, + const char *E, PositionContext p, + const llvm::TextEncodingConverter &FormatStrConverter); + +OptionalAmount +ParsePositionAmount(FormatStringHandler &H, const char *Start, const char *&Beg, + const char *E, PositionContext p, + const llvm::TextEncodingConverter &FormatStrConverter); bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &CS, const char *Start, const char *&Beg, const char *E, - unsigned *argIndex); + unsigned *argIndex, + const llvm::TextEncodingConverter &FormatStrConverter); bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS, - const char *Start, const char *&Beg, const char *E); + const char *Start, const char *&Beg, const char *E, + const llvm::TextEncodingConverter &FormatStrConverter); bool ParseVectorModifier(FormatStringHandler &H, FormatSpecifier &FS, - const char *&Beg, const char *E, - const LangOptions &LO); + const char *&Beg, const char *E, const LangOptions &LO, + const llvm::TextEncodingConverter &FormatStrConverter); /// Returns true if a LengthModifier was parsed and installed in the /// FormatSpecifier& argument, and false otherwise. bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, - const LangOptions &LO, bool IsScanf = false); + const LangOptions &LO, + const llvm::TextEncodingConverter &FormatStrConverter, + bool IsScanf = false); /// Returns true if the invalid specifier in \p SpecifierBegin is a UTF-8 /// string; check that it won't go further than \p FmtStrEnd and write diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index 6610a2de9e083..7efcc554ec136 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -35,14 +35,17 @@ typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> using analyze_format_string::ParseNonPositionAmount; -static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, - const char *Start, const char *&Beg, const char *E, - unsigned *argIndex) { +static bool +ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, const char *Start, + const char *&Beg, const char *E, unsigned *argIndex, + const llvm::TextEncodingConverter &FormatStrConverter) { if (argIndex) { - FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); + FS.setPrecision( + ParseNonPositionAmount(Beg, E, *argIndex, FormatStrConverter)); } else { const OptionalAmount Amt = ParsePositionAmount( - H, Start, Beg, E, analyze_format_string::PrecisionPos); + H, Start, Beg, E, analyze_format_string::PrecisionPos, + FormatStrConverter); if (Amt.isInvalid()) return true; FS.setPrecision(Amt); @@ -50,11 +53,14 @@ static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, return false; } -static bool ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS, - const char *FlagBeg, const char *E, bool Warn) { +static bool +ParseObjCFlags(FormatStringHandler &H, PrintfSpecifier &FS, const char *FlagBeg, + const char *E, bool Warn, + const llvm::TextEncodingConverter &FormatStrConverter) { StringRef Flag(FlagBeg, E - FlagBeg); // Currently there is only one flag. - if (Flag == "tt") { + if (Flag.size() == 2 && FormatStrConverter.convert(FlagBeg[0]) == 't' && + FormatStrConverter.convert(FlagBeg[1]) == 't') { FS.setHasObjCTechnicalTerm(FlagBeg); return false; } @@ -81,6 +87,8 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, const char *Start = nullptr; UpdateOnReturn<const char *> UpdateBeg(Beg, I); + const llvm::TextEncodingConverter &FormatStrConverter = + *Target.FormatStrConverter; // Look for a '%' character that indicates the start of a format specifier. for (; I != E; ++I) { char c = *I; @@ -89,7 +97,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, H.HandleNullChar(I); return true; } - if (c == '%') { + if (FormatStrConverter.convert(c) == '%') { Start = I++; // Record the start of the format specifier. break; } @@ -107,7 +115,7 @@ ParsePrintfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, } PrintfSpecifier FS; - if (ParseArgPosition(H, FS, Start, I, E)) + if (ParseArgPosition(H, FS, Start, I, E, FormatStrConverter)) return true; if (I == E) { @@ -117,13 +125,17 @@ ParsePrintfSpecifier(FormatStrin... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/196568 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
