https://github.com/nikic created https://github.com/llvm/llvm-project/pull/159420
None >From 0dca43897ece415a59345b11dd86854d60a3a640 Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Fri, 12 Sep 2025 09:11:08 +0200 Subject: [PATCH 1/2] [MC] Add parseSymbol() helper (NFC) (#158106) This combines parseIdentifier() + getOrCreateSymbol(). This should make it a bit easier if we want to change the parseIdentifier() API. (cherry picked from commit 76aba5d415fbf206e0d9443a5822fcd9244fa33f) --- llvm/include/llvm/MC/MCParser/MCAsmParser.h | 3 + llvm/lib/MC/MCParser/AsmParser.cpp | 53 ++++++----------- llvm/lib/MC/MCParser/COFFAsmParser.cpp | 66 +++++++-------------- llvm/lib/MC/MCParser/COFFMasmParser.cpp | 13 ++-- llvm/lib/MC/MCParser/DarwinAsmParser.cpp | 44 +++++--------- llvm/lib/MC/MCParser/ELFAsmParser.cpp | 32 ++++------ llvm/lib/MC/MCParser/MCAsmParser.cpp | 9 +++ llvm/lib/MC/MCParser/MasmParser.cpp | 19 +++--- llvm/lib/MC/MCParser/WasmAsmParser.cpp | 10 ++-- 9 files changed, 96 insertions(+), 153 deletions(-) diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h index cb9bd5c600d52..e3f44a08db641 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -279,6 +279,9 @@ class LLVM_ABI MCAsmParser { /// Res to the identifier contents. virtual bool parseIdentifier(StringRef &Res) = 0; + /// Parse identifier and get or create symbol for it. + bool parseSymbol(MCSymbol *&Res); + /// Parse up to the end of statement and return the contents from the /// current token until the end of the statement; the current token on exit /// will be either the EndOfStatement or EOF. diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 9fd6c05a846db..cea781f9ec36e 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -3897,20 +3897,15 @@ bool AsmParser::parseDirectiveCVLoc() { /// ::= .cv_linetable FunctionId, FnStart, FnEnd bool AsmParser::parseDirectiveCVLinetable() { int64_t FunctionId; - StringRef FnStartName, FnEndName; + MCSymbol *FnStartSym, *FnEndSym; SMLoc Loc = getTok().getLoc(); if (parseCVFunctionId(FunctionId, ".cv_linetable") || parseComma() || parseTokenLoc(Loc) || - check(parseIdentifier(FnStartName), Loc, - "expected identifier in directive") || + check(parseSymbol(FnStartSym), Loc, "expected identifier in directive") || parseComma() || parseTokenLoc(Loc) || - check(parseIdentifier(FnEndName), Loc, - "expected identifier in directive")) + check(parseSymbol(FnEndSym), Loc, "expected identifier in directive")) return true; - MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName); - MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName); - getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym); return false; } @@ -3919,7 +3914,7 @@ bool AsmParser::parseDirectiveCVLinetable() { /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd bool AsmParser::parseDirectiveCVInlineLinetable() { int64_t PrimaryFunctionId, SourceFileId, SourceLineNum; - StringRef FnStartName, FnEndName; + MCSymbol *FnStartSym, *FnEndSym; SMLoc Loc = getTok().getLoc(); if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") || parseTokenLoc(Loc) || @@ -3929,16 +3924,14 @@ bool AsmParser::parseDirectiveCVInlineLinetable() { parseIntToken(SourceLineNum, "expected SourceLineNum") || check(SourceLineNum < 0, Loc, "Line number less than zero") || parseTokenLoc(Loc) || - check(parseIdentifier(FnStartName), Loc, "expected identifier") || + check(parseSymbol(FnStartSym), Loc, "expected identifier") || parseTokenLoc(Loc) || - check(parseIdentifier(FnEndName), Loc, "expected identifier")) + check(parseSymbol(FnEndSym), Loc, "expected identifier")) return true; if (parseEOL()) return true; - MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName); - MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName); getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym); @@ -3959,16 +3952,14 @@ bool AsmParser::parseDirectiveCVDefRange() { std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges; while (getLexer().is(AsmToken::Identifier)) { Loc = getLexer().getLoc(); - StringRef GapStartName; - if (parseIdentifier(GapStartName)) + MCSymbol *GapStartSym; + if (parseSymbol(GapStartSym)) return Error(Loc, "expected identifier in directive"); - MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName); Loc = getLexer().getLoc(); - StringRef GapEndName; - if (parseIdentifier(GapEndName)) + MCSymbol *GapEndSym; + if (parseSymbol(GapEndSym)) return Error(Loc, "expected identifier in directive"); - MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName); Ranges.push_back({GapStartSym, GapEndSym}); } @@ -4105,12 +4096,11 @@ bool AsmParser::parseDirectiveCVFileChecksumOffset() { /// ::= .cv_fpo_data procsym bool AsmParser::parseDirectiveCVFPOData() { SMLoc DirLoc = getLexer().getLoc(); - StringRef ProcName; - if (parseIdentifier(ProcName)) + MCSymbol *ProcSym; + if (parseSymbol(ProcSym)) return TokError("expected symbol name"); if (parseEOL()) return true; - MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); getStreamer().emitCVFPOData(ProcSym, DirLoc); return false; } @@ -4329,15 +4319,12 @@ bool AsmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) { if (Encoding == dwarf::DW_EH_PE_omit) return false; - StringRef Name; + MCSymbol *Sym; if (check(!isValidEncoding(Encoding), "unsupported encoding.") || parseComma() || - check(parseIdentifier(Name), "expected identifier in directive") || - parseEOL()) + check(parseSymbol(Sym), "expected identifier in directive") || parseEOL()) return true; - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (IsPersonality) getStreamer().emitCFIPersonality(Sym, Encoding); else @@ -4988,13 +4975,10 @@ bool AsmParser::parseDirectiveComm(bool IsLocal) { return true; SMLoc IDLoc = getLexer().getLoc(); - StringRef Name; - if (parseIdentifier(Name)) + MCSymbol *Sym; + if (parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (parseComma()) return true; @@ -5827,10 +5811,9 @@ bool AsmParser::parseDirectiveAddrsig() { } bool AsmParser::parseDirectiveAddrsigSym() { - StringRef Name; - if (check(parseIdentifier(Name), "expected identifier") || parseEOL()) + MCSymbol *Sym; + if (check(parseSymbol(Sym), "expected identifier") || parseEOL()) return true; - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitAddrsigSym(Sym); return false; } diff --git a/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/llvm/lib/MC/MCParser/COFFAsmParser.cpp index 9fb17488a9e9c..5dd79946d8779 100644 --- a/llvm/lib/MC/MCParser/COFFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFAsmParser.cpp @@ -293,13 +293,11 @@ bool COFFAsmParser::parseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!"); if (getLexer().isNot(AsmToken::EndOfStatement)) { while (true) { - StringRef Name; + MCSymbol *Sym; - if (getParser().parseIdentifier(Name)) + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - getStreamer().emitSymbolAttribute(Sym, Attr); if (getLexer().is(AsmToken::EndOfStatement)) @@ -450,13 +448,11 @@ bool COFFAsmParser::parseDirectivePopSection(StringRef, SMLoc) { } bool COFFAsmParser::parseDirectiveDef(StringRef, SMLoc) { - StringRef SymbolName; + MCSymbol *Sym; - if (getParser().parseIdentifier(SymbolName)) + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName); - getStreamer().beginCOFFSymbolDef(Sym); Lex(); @@ -496,8 +492,8 @@ bool COFFAsmParser::parseDirectiveEndef(StringRef, SMLoc) { } bool COFFAsmParser::parseDirectiveSecRel32(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); int64_t Offset = 0; @@ -517,8 +513,6 @@ bool COFFAsmParser::parseDirectiveSecRel32(StringRef, SMLoc) { "invalid '.secrel32' directive offset, can't be less " "than zero or greater than std::numeric_limits<uint32_t>::max()"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSecRel32(Symbol, Offset); return false; @@ -526,8 +520,8 @@ bool COFFAsmParser::parseDirectiveSecRel32(StringRef, SMLoc) { bool COFFAsmParser::parseDirectiveRVA(StringRef, SMLoc) { auto parseOp = [&]() -> bool { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); int64_t Offset = 0; @@ -544,8 +538,6 @@ bool COFFAsmParser::parseDirectiveRVA(StringRef, SMLoc) { "than -2147483648 or greater than " "2147483647"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - getStreamer().emitCOFFImgRel32(Symbol, Offset); return false; }; @@ -556,75 +548,65 @@ bool COFFAsmParser::parseDirectiveRVA(StringRef, SMLoc) { } bool COFFAsmParser::parseDirectiveSafeSEH(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSafeSEH(Symbol); return false; } bool COFFAsmParser::parseDirectiveSecIdx(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSectionIndex(Symbol); return false; } bool COFFAsmParser::parseDirectiveSymIdx(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSymbolIndex(Symbol); return false; } bool COFFAsmParser::parseDirectiveSecNum(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSecNumber(Symbol); return false; } bool COFFAsmParser::parseDirectiveSecOffset(StringRef, SMLoc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitCOFFSecOffset(Symbol); return false; @@ -679,15 +661,13 @@ bool COFFAsmParser::parseDirectiveLinkOnce(StringRef, SMLoc Loc) { } bool COFFAsmParser::parseSEHDirectiveStartProc(StringRef, SMLoc Loc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *Symbol; + if (getParser().parseSymbol(Symbol)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitWinCFIStartProc(Symbol, Loc); return false; @@ -718,8 +698,8 @@ bool COFFAsmParser::parseSEHDirectiveEndChained(StringRef, SMLoc Loc) { } bool COFFAsmParser::parseSEHDirectiveHandler(StringRef, SMLoc Loc) { - StringRef SymbolID; - if (getParser().parseIdentifier(SymbolID)) + MCSymbol *handler; + if (getParser().parseSymbol(handler)) return true; if (getLexer().isNot(AsmToken::Comma)) @@ -736,8 +716,6 @@ bool COFFAsmParser::parseSEHDirectiveHandler(StringRef, SMLoc Loc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *handler = getContext().getOrCreateSymbol(SymbolID); - Lex(); getStreamer().emitWinEHHandler(handler, unwind, except, Loc); return false; diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp index 282f22fd33609..7e7c01cb4d73a 100644 --- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp @@ -444,8 +444,8 @@ bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) { if (!getStreamer().getCurrentFragment()) return Error(getTok().getLoc(), "expected section directive"); - StringRef Label; - if (getParser().parseIdentifier(Label)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return Error(Loc, "expected identifier for procedure"); if (getLexer().is(AsmToken::Identifier)) { StringRef nextVal = getTok().getString(); @@ -460,11 +460,12 @@ bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) { nextLoc = getTok().getLoc(); } } - MCSymbolCOFF *Sym = cast<MCSymbolCOFF>(getContext().getOrCreateSymbol(Label)); // Define symbol as simple external function - Sym->setExternal(true); - Sym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); + auto *COFFSym = cast<MCSymbolCOFF>(Sym); + COFFSym->setExternal(true); + COFFSym->setType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); bool Framed = false; if (getLexer().is(AsmToken::Identifier) && @@ -475,7 +476,7 @@ bool COFFMasmParser::parseDirectiveProc(StringRef Directive, SMLoc Loc) { } getStreamer().emitLabel(Sym, Loc); - CurrentProcedures.push_back(Label); + CurrentProcedures.push_back(Sym->getName()); CurrentProceduresFramed.push_back(Framed); return false; } diff --git a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp index a9095b3298f5e..fceb718d091c9 100644 --- a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp @@ -501,13 +501,10 @@ bool DarwinAsmParser::parseSectionSwitch(StringRef Segment, StringRef Section, /// parseDirectiveAltEntry /// ::= .alt_entry identifier bool DarwinAsmParser::parseDirectiveAltEntry(StringRef, SMLoc) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Look up symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (Sym->isDefined()) return TokError(".alt_entry must preceed symbol definition"); @@ -521,13 +518,10 @@ bool DarwinAsmParser::parseDirectiveAltEntry(StringRef, SMLoc) { /// parseDirectiveDesc /// ::= .desc identifier , expression bool DarwinAsmParser::parseDirectiveDesc(StringRef, SMLoc) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.desc' directive"); Lex(); @@ -560,18 +554,17 @@ bool DarwinAsmParser::parseDirectiveIndirectSymbol(StringRef, SMLoc Loc) { return Error(Loc, "indirect symbol not in a symbol pointer or stub " "section"); - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in .indirect_symbol directive"); - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - // Assembler local symbols don't make any sense here. Complain loudly. if (Sym->isTemporary()) return TokError("non-local symbol required in directive"); if (!getStreamer().emitSymbolAttribute(Sym, MCSA_IndirectSymbol)) - return TokError("unable to emit indirect symbol attribute for: " + Name); + return TokError("unable to emit indirect symbol attribute for: " + + Sym->getName()); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.indirect_symbol' directive"); @@ -633,13 +626,10 @@ bool DarwinAsmParser::parseDirectiveLinkerOption(StringRef IDVal, SMLoc) { /// parseDirectiveLsym /// ::= .lsym identifier , expression bool DarwinAsmParser::parseDirectiveLsym(StringRef, SMLoc) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.lsym' directive"); Lex(); @@ -826,13 +816,10 @@ bool DarwinAsmParser::parseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) { /// ::= .tbss identifier, size, align bool DarwinAsmParser::parseDirectiveTBSS(StringRef, SMLoc) { SMLoc IDLoc = getLexer().getLoc(); - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); @@ -911,13 +898,10 @@ bool DarwinAsmParser::parseDirectiveZerofill(StringRef, SMLoc) { Lex(); SMLoc IDLoc = getLexer().getLoc(); - StringRef IDStr; - if (getParser().parseIdentifier(IDStr)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - // handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(IDStr); - if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index ec8b40261a6ca..6d58e31996d39 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -197,10 +197,9 @@ bool ELFAsmParser::parseSectionSwitch(StringRef Section, unsigned Type, } bool ELFAsmParser::parseDirectiveSize(StringRef, SMLoc) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier"); - MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name)); if (getLexer().isNot(AsmToken::Comma)) return TokError("expected comma"); @@ -711,13 +710,10 @@ static MCSymbolAttr MCAttrForString(StringRef Type) { /// ::= .type identifier , %attribute /// ::= .type identifier , "attribute" bool ELFAsmParser::parseDirectiveType(StringRef, SMLoc) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - bool AllowAt = getLexer().getAllowAtInIdentifier(); if (!AllowAt && !getContext().getAsmInfo()->getCommentString().starts_with("@")) @@ -789,8 +785,9 @@ bool ELFAsmParser::parseDirectiveIdent(StringRef, SMLoc) { /// parseDirectiveSymver /// ::= .symver foo, bar2@zed bool ELFAsmParser::parseDirectiveSymver(StringRef, SMLoc) { - StringRef OriginalName, Name, Action; - if (getParser().parseIdentifier(OriginalName)) + MCSymbol *OriginalSym; + StringRef Name, Action; + if (getParser().parseSymbol(OriginalSym)) return TokError("expected identifier"); if (getLexer().isNot(AsmToken::Comma)) @@ -818,8 +815,7 @@ bool ELFAsmParser::parseDirectiveSymver(StringRef, SMLoc) { } (void)parseOptionalToken(AsmToken::EndOfStatement); - getStreamer().emitELFSymverDirective( - getContext().getOrCreateSymbol(OriginalName), Name, KeepOriginalSym); + getStreamer().emitELFSymverDirective(OriginalSym, Name, KeepOriginalSym); return false; } @@ -852,8 +848,8 @@ bool ELFAsmParser::parseDirectiveVersion(StringRef, SMLoc) { bool ELFAsmParser::parseDirectiveWeakref(StringRef, SMLoc) { // FIXME: Share code with the other alias building directives. - StringRef AliasName; - if (getParser().parseIdentifier(AliasName)) + MCSymbol *Alias; + if (getParser().parseSymbol(Alias)) return TokError("expected identifier"); if (getLexer().isNot(AsmToken::Comma)) @@ -861,14 +857,10 @@ bool ELFAsmParser::parseDirectiveWeakref(StringRef, SMLoc) { Lex(); - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier"); - MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName); - - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - getStreamer().emitWeakReference(Alias, Sym); return false; } diff --git a/llvm/lib/MC/MCParser/MCAsmParser.cpp b/llvm/lib/MC/MCParser/MCAsmParser.cpp index 68b9cab2492f5..3721541c71e11 100644 --- a/llvm/lib/MC/MCParser/MCAsmParser.cpp +++ b/llvm/lib/MC/MCParser/MCAsmParser.cpp @@ -163,6 +163,15 @@ bool MCAsmParser::parseGNUAttribute(SMLoc L, int64_t &Tag, return true; } +bool MCAsmParser::parseSymbol(MCSymbol *&Res) { + StringRef Name; + if (parseIdentifier(Name)) + return true; + + Res = getContext().getOrCreateSymbol(Name); + return false; +} + void MCParsedAsmOperand::dump() const { // Cannot completely remove virtual function even in release mode. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index f4684e64e8628..e91f2f507022c 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -4504,9 +4504,9 @@ bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) { bool MasmParser::parseDirectiveExtern() { // .extern is the default - but we still need to take any provided type info. auto parseOp = [&]() -> bool { - StringRef Name; + MCSymbol *Sym; SMLoc NameLoc = getTok().getLoc(); - if (parseIdentifier(Name)) + if (parseSymbol(Sym)) return Error(NameLoc, "expected name"); if (parseToken(AsmToken::Colon)) return true; @@ -4519,10 +4519,9 @@ bool MasmParser::parseDirectiveExtern() { AsmTypeInfo Type; if (lookUpType(TypeName, Type)) return Error(TypeLoc, "unrecognized type"); - KnownType[Name.lower()] = Type; + KnownType[Sym->getName().lower()] = Type; } - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); Sym->setExternal(true); getStreamer().emitSymbolAttribute(Sym, MCSA_Extern); @@ -4538,11 +4537,10 @@ bool MasmParser::parseDirectiveExtern() { /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) { auto parseOp = [&]() -> bool { - StringRef Name; SMLoc Loc = getTok().getLoc(); - if (parseIdentifier(Name)) + MCSymbol *Sym; + if (parseSymbol(Sym)) return Error(Loc, "expected identifier"); - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); // Assembler local symbols don't make any sense here. Complain loudly. if (Sym->isTemporary()) @@ -4565,13 +4563,10 @@ bool MasmParser::parseDirectiveComm(bool IsLocal) { return true; SMLoc IDLoc = getLexer().getLoc(); - StringRef Name; - if (parseIdentifier(Name)) + MCSymbol *Sym; + if (parseSymbol(Sym)) return TokError("expected identifier in directive"); - // Handle the identifier as the key symbol. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp index 1f824b80bcd4b..f2477e04302a0 100644 --- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp +++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp @@ -213,10 +213,9 @@ class WasmAsmParser : public MCAsmParserExtension { // TODO: This function is almost the same as ELFAsmParser::ParseDirectiveSize // so maybe could be shared somehow. bool parseDirectiveSize(StringRef, SMLoc Loc) { - StringRef Name; - if (Parser->parseIdentifier(Name)) + MCSymbol *Sym; + if (Parser->parseSymbol(Sym)) return TokError("expected identifier in directive"); - auto Sym = getContext().getOrCreateSymbol(Name); if (expect(AsmToken::Comma, ",")) return true; const MCExpr *Expr; @@ -294,10 +293,9 @@ class WasmAsmParser : public MCAsmParserExtension { assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!"); if (getLexer().isNot(AsmToken::EndOfStatement)) { while (true) { - StringRef Name; - if (getParser().parseIdentifier(Name)) + MCSymbol *Sym; + if (getParser().parseSymbol(Sym)) return TokError("expected identifier in directive"); - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitSymbolAttribute(Sym, Attr); if (getLexer().is(AsmToken::EndOfStatement)) break; >From 9d99d0ffba42840f2dfb534db949a7b69db92066 Mon Sep 17 00:00:00 2001 From: Fangrui Song <i...@maskray.me> Date: Tue, 16 Sep 2025 09:42:09 -0700 Subject: [PATCH 2/2] MC: Better handle backslash-escaped symbols (#158780) The MCContext::getOrCreateSymbol change in #138817 was a workaround. With #158106, we can replace `getOrCreateSymbol` with `parseSymbol`, in llvm/lib/MC/MCParser to handle backslash-escaped symbols. (cherry picked from commit 0cf668889823e7dc526b0b3039c22452f61538f2) --- llvm/include/llvm/MC/MCContext.h | 4 ++ llvm/lib/MC/MCContext.cpp | 49 +++++++++++-------- llvm/lib/MC/MCParser/AsmParser.cpp | 12 ++--- llvm/lib/MC/MCParser/COFFMasmParser.cpp | 4 +- llvm/lib/MC/MCParser/ELFAsmParser.cpp | 2 +- llvm/lib/MC/MCParser/MCAsmParser.cpp | 2 +- llvm/lib/MC/MCParser/MCAsmParserExtension.cpp | 4 +- llvm/lib/MC/MCParser/MasmParser.cpp | 13 +++-- llvm/lib/MC/MCParser/WasmAsmParser.cpp | 5 +- llvm/test/CodeGen/X86/symbol-name.ll | 5 ++ llvm/test/MC/ELF/cgprofile.s | 10 ++-- llvm/test/MC/ELF/symbol-names.s | 3 ++ 12 files changed, 65 insertions(+), 48 deletions(-) create mode 100644 llvm/test/CodeGen/X86/symbol-name.ll diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 5a8ec17dae1cc..22a493a8df013 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -489,6 +489,10 @@ class MCContext { /// \param Name - The symbol name, which must be unique across all symbols. LLVM_ABI MCSymbol *getOrCreateSymbol(const Twine &Name); + /// Variant of getOrCreateSymbol that handles backslash-escaped symbols. + /// For example, parse "a\"b\\" as a"\. + LLVM_ABI MCSymbol *parseSymbol(const Twine &Name); + /// Gets a symbol that will be defined to the final stack offset of a local /// variable after codegen. /// diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index 890184db1d1ef..1625455e38e06 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -217,27 +217,6 @@ MCDataFragment *MCContext::allocInitialFragment(MCSection &Sec) { MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) { SmallString<128> NameSV; StringRef NameRef = Name.toStringRef(NameSV); - if (NameRef.contains('\\')) { - NameSV = NameRef; - size_t S = 0; - // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for - // other characters following \\, which we do not implement due to code - // structure. - for (size_t I = 0, E = NameSV.size(); I != E; ++I) { - char C = NameSV[I]; - if (C == '\\' && I + 1 != E) { - switch (NameSV[I + 1]) { - case '"': - case '\\': - C = NameSV[++I]; - break; - } - } - NameSV[S++] = C; - } - NameSV.resize(S); - NameRef = NameSV; - } assert(!NameRef.empty() && "Normal symbols cannot be unnamed!"); @@ -258,6 +237,34 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) { return Entry.second.Symbol; } +MCSymbol *MCContext::parseSymbol(const Twine &Name) { + SmallString<128> SV; + StringRef NameRef = Name.toStringRef(SV); + if (NameRef.contains('\\')) { + SV = NameRef; + size_t S = 0; + // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for + // other characters following \\, which we do not implement due to code + // structure. + for (size_t I = 0, E = SV.size(); I != E; ++I) { + char C = SV[I]; + if (C == '\\' && I + 1 != E) { + switch (SV[I + 1]) { + case '"': + case '\\': + C = SV[++I]; + break; + } + } + SV[S++] = C; + } + SV.resize(S); + NameRef = SV; + } + + return getOrCreateSymbol(NameRef); +} + MCSymbol *MCContext::getOrCreateFrameAllocSymbol(const Twine &FuncName, unsigned Idx) { return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + FuncName + diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index cea781f9ec36e..929051c4f0c17 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -1222,8 +1222,8 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName); if (!Sym) - Sym = getContext().getOrCreateSymbol(MAI.isHLASM() ? SymbolName.upper() - : SymbolName); + Sym = getContext().parseSymbol(MAI.isHLASM() ? SymbolName.upper() + : SymbolName); // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. @@ -1854,7 +1854,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, RewrittenLabel); IDVal = RewrittenLabel; } - Sym = getContext().getOrCreateSymbol(IDVal); + Sym = getContext().parseSymbol(IDVal); } else Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal); // End of Labels should be treated as end of line for lexing @@ -4953,7 +4953,7 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) { if (discardLTOSymbol(Name)) return false; - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + MCSymbol *Sym = getContext().parseSymbol(Name); // Assembler local symbols don't make any sense here, except for directives // that the symbol should be tagged. @@ -6213,7 +6213,7 @@ bool HLASMAsmParser::parseAsHLASMLabel(ParseStatementInfo &Info, return Error(LabelLoc, "Cannot have just a label for an HLASM inline asm statement"); - MCSymbol *Sym = getContext().getOrCreateSymbol( + MCSymbol *Sym = getContext().parseSymbol( getContext().getAsmInfo()->isHLASM() ? LabelVal.upper() : LabelVal); // Emit the label. @@ -6340,7 +6340,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef, Parser.getStreamer().emitValueToOffset(Value, 0, EqualLoc); return false; } else - Sym = Parser.getContext().getOrCreateSymbol(Name); + Sym = Parser.getContext().parseSymbol(Name); Sym->setRedefinable(allow_redef); diff --git a/llvm/lib/MC/MCParser/COFFMasmParser.cpp b/llvm/lib/MC/MCParser/COFFMasmParser.cpp index 7e7c01cb4d73a..b855a765e8d26 100644 --- a/llvm/lib/MC/MCParser/COFFMasmParser.cpp +++ b/llvm/lib/MC/MCParser/COFFMasmParser.cpp @@ -511,8 +511,8 @@ bool COFFMasmParser::parseDirectiveAlias(StringRef Directive, SMLoc Loc) { getParser().parseAngleBracketString(ActualName)) return Error(getTok().getLoc(), "expected <actualName>"); - MCSymbol *Alias = getContext().getOrCreateSymbol(AliasName); - MCSymbol *Actual = getContext().getOrCreateSymbol(ActualName); + MCSymbol *Alias = getContext().parseSymbol(AliasName); + MCSymbol *Actual = getContext().parseSymbol(ActualName); getStreamer().emitWeakReference(Alias, Actual); diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 6d58e31996d39..b04ec64517721 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -164,7 +164,7 @@ bool ELFAsmParser::parseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { continue; } - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + MCSymbol *Sym = getContext().parseSymbol(Name); getStreamer().emitSymbolAttribute(Sym, Attr); diff --git a/llvm/lib/MC/MCParser/MCAsmParser.cpp b/llvm/lib/MC/MCParser/MCAsmParser.cpp index 3721541c71e11..c1b7e57184de1 100644 --- a/llvm/lib/MC/MCParser/MCAsmParser.cpp +++ b/llvm/lib/MC/MCParser/MCAsmParser.cpp @@ -168,7 +168,7 @@ bool MCAsmParser::parseSymbol(MCSymbol *&Res) { if (parseIdentifier(Name)) return true; - Res = getContext().getOrCreateSymbol(Name); + Res = getContext().parseSymbol(Name); return false; } diff --git a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp index 7fa05088c9725..299d4b46a8a84 100644 --- a/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp +++ b/llvm/lib/MC/MCParser/MCAsmParserExtension.cpp @@ -50,8 +50,8 @@ bool MCAsmParserExtension::parseDirectiveCGProfile(StringRef, SMLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - MCSymbol *FromSym = getContext().getOrCreateSymbol(From); - MCSymbol *ToSym = getContext().getOrCreateSymbol(To); + MCSymbol *FromSym = getContext().parseSymbol(From); + MCSymbol *ToSym = getContext().parseSymbol(To); getStreamer().emitCGProfileEntry( MCSymbolRefExpr::create(FromSym, getContext(), FromLoc), diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index e91f2f507022c..3730c63b04233 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -1480,7 +1480,7 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, auto VarIt = Variables.find(SymbolName.lower()); if (VarIt != Variables.end()) SymbolName = VarIt->second.Name; - Sym = getContext().getOrCreateSymbol(SymbolName); + Sym = getContext().parseSymbol(SymbolName); } // If this is an absolute variable reference, substitute it now to preserve @@ -1965,7 +1965,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, if (IDVal == "@@") { Sym = Ctx.createDirectionalLocalSymbol(0); } else { - Sym = getContext().getOrCreateSymbol(IDVal); + Sym = getContext().parseSymbol(IDVal); } // End of Labels should be treated as end of line for lexing @@ -3009,8 +3009,7 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, return false; } - MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name); - + auto *Sym = getContext().parseSymbol(Var.Name); const MCConstantExpr *PrevValue = Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(Sym->getVariableValue()) @@ -3318,7 +3317,7 @@ bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size, StringRef Name, SMLoc NameLoc) { if (StructInProgress.empty()) { // Initialize named data value. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + MCSymbol *Sym = getContext().parseSymbol(Name); getStreamer().emitLabel(Sym); unsigned Count; if (emitIntegralValues(Size, &Count)) @@ -3509,7 +3508,7 @@ bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName, SMLoc NameLoc) { if (StructInProgress.empty()) { // Initialize named data value. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + MCSymbol *Sym = getContext().parseSymbol(Name); getStreamer().emitLabel(Sym); unsigned Count; if (emitRealValues(Semantics, &Count)) @@ -4003,7 +4002,7 @@ bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure, SMLoc DirLoc, StringRef Name) { if (StructInProgress.empty()) { // Initialize named data value. - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + MCSymbol *Sym = getContext().parseSymbol(Name); getStreamer().emitLabel(Sym); unsigned Count; if (emitStructValues(Structure, &Count)) diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp index f2477e04302a0..61534c6436ac6 100644 --- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp +++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp @@ -240,9 +240,8 @@ class WasmAsmParser : public MCAsmParserExtension { if (!Lexer->is(AsmToken::Identifier)) return error("Expected label after .type directive, got: ", Lexer->getTok()); - auto WasmSym = cast<MCSymbolWasm>( - getStreamer().getContext().getOrCreateSymbol( - Lexer->getTok().getString())); + auto *WasmSym = cast<MCSymbolWasm>( + getStreamer().getContext().parseSymbol(Lexer->getTok().getString())); Lex(); if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) && Lexer->is(AsmToken::Identifier))) diff --git a/llvm/test/CodeGen/X86/symbol-name.ll b/llvm/test/CodeGen/X86/symbol-name.ll new file mode 100644 index 0000000000000..dd9be14fb053e --- /dev/null +++ b/llvm/test/CodeGen/X86/symbol-name.ll @@ -0,0 +1,5 @@ +; RUN: llc < %s -mtriple=x86_64 -relocation-model=pic | FileCheck %s + +; CHECK: .globl "\\\"" +; CHECK-NEXT: "\\\"": +@"\\\22" = constant i8 0 diff --git a/llvm/test/MC/ELF/cgprofile.s b/llvm/test/MC/ELF/cgprofile.s index f8469ddc68877..28d8b72185556 100644 --- a/llvm/test/MC/ELF/cgprofile.s +++ b/llvm/test/MC/ELF/cgprofile.s @@ -5,11 +5,11 @@ a: .word b .cg_profile a, b, 32 .cg_profile freq, a, 11 - .cg_profile late, late2, 20 + .cg_profile "late\\", late2, 20 .cg_profile .L.local, b, 42 - .globl late -late: + .globl "late\\" +"late\\": late2: .word 0 late3: .L.local: @@ -31,7 +31,7 @@ late3: # CHECK-NEXT: 0010: 14000000 00000000 2A000000 00000000 # CHECK-NEXT: ) -# CHECK: Name: .rel.llvm.call-graph-profile (28) +# CHECK: Name: .rel.llvm.call-graph-profile # CHECK-NEXT: Type: SHT_REL (0x9) # CHECK-NEXT: Flags [ (0x40) # CHECK-NEXT: SHF_INFO_LINK @@ -83,7 +83,7 @@ late3: # CHECK-NEXT: Type: # CHECK-NEXT: Other: # CHECK-NEXT: Section: Undefined -# CHECK: Name: late +# CHECK: Name: late\ ([[#]]) # CHECK-NEXT: Value: # CHECK-NEXT: Size: # CHECK-NEXT: Binding: Global diff --git a/llvm/test/MC/ELF/symbol-names.s b/llvm/test/MC/ELF/symbol-names.s index 427187c329acf..f1593dd2f8099 100644 --- a/llvm/test/MC/ELF/symbol-names.s +++ b/llvm/test/MC/ELF/symbol-names.s @@ -5,6 +5,7 @@ // CHECK-LABEL:SYMBOL TABLE: // CHECK-NEXT: 0000000000000001 l F .text 0000000000000000 a"b\{{$}} // CHECK-NEXT: 0000000000000006 l .text 0000000000000000 a\{{$}} +// CHECK-NEXT: 000000000000000b l .text 0000000000000000 a\\{{$}} // CHECK-NEXT: 0000000000000000 g F .text 0000000000000000 foo?bar // CHECK-NEXT: 0000000000000000 *UND* 0000000000000000 a"b\q{{$}} // CHECK-EMPTY: @@ -26,3 +27,5 @@ ret "a\\": /// GAS emits a warning for \q call "a\"b\q" + +"a\\\\" = . _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits