[llvm-branch-commits] [NFC][BoundsChecking] Add TrapBB local variable (PR #119983)
https://github.com/thurstond approved this pull request. https://github.com/llvm/llvm-project/pull/119983 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [ubsan] Add runtime test for -fsanitize=local-bounds (PR #120038)
https://github.com/thurstond approved this pull request. https://github.com/llvm/llvm-project/pull/120038 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed TLSDESC (PR #113817)
https://github.com/kovdan01 updated https://github.com/llvm/llvm-project/pull/113817 >From f2dc47f188eb68bb53bb60d85d3d617bcf90d823 Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Fri, 25 Oct 2024 12:32:27 +0300 Subject: [PATCH 01/10] [PAC][lld][AArch64][ELF] Support signed TLSDESC Support `R_AARCH64_AUTH_TLSDESC_ADR_PAGE21`, `R_AARCH64_AUTH_TLSDESC_LD64_LO12` and `R_AARCH64_AUTH_TLSDESC_LD64_LO12` static TLSDESC relocations. --- lld/ELF/Arch/AArch64.cpp | 8 ++ lld/ELF/InputSection.cpp | 2 + lld/ELF/Relocations.cpp | 38 +++- lld/ELF/Relocations.h| 2 + lld/ELF/Symbols.h| 1 + lld/ELF/SyntheticSections.cpp| 5 + lld/test/ELF/aarch64-tlsdesc-pauth.s | 134 +++ 7 files changed, 188 insertions(+), 2 deletions(-) create mode 100644 lld/test/ELF/aarch64-tlsdesc-pauth.s diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 9571e0e9566fc3..68c9c5e20e0f94 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -157,9 +157,14 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, return RE_AARCH64_AUTH; case R_AARCH64_TLSDESC_ADR_PAGE21: return RE_AARCH64_TLSDESC_PAGE; + case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21: +return RE_AARCH64_AUTH_TLSDESC_PAGE; case R_AARCH64_TLSDESC_LD64_LO12: case R_AARCH64_TLSDESC_ADD_LO12: return R_TLSDESC; + case R_AARCH64_AUTH_TLSDESC_LD64_LO12: + case R_AARCH64_AUTH_TLSDESC_ADD_LO12: +return RE_AARCH64_AUTH_TLSDESC; case R_AARCH64_TLSDESC_CALL: return R_TLSDESC_CALL; case R_AARCH64_TLSLE_ADD_TPREL_HI12: @@ -542,6 +547,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: case R_AARCH64_TLSDESC_ADR_PAGE21: + case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21: checkInt(ctx, loc, val, 33, rel); [[fallthrough]]; case R_AARCH64_ADR_PREL_PG_HI21_NC: @@ -592,6 +598,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: case R_AARCH64_TLSDESC_LD64_LO12: + case R_AARCH64_AUTH_TLSDESC_LD64_LO12: checkAlignment(ctx, loc, val, 8, rel); write32Imm12(loc, getBits(val, 3, 11)); break; @@ -666,6 +673,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, break; case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: case R_AARCH64_TLSDESC_ADD_LO12: + case R_AARCH64_AUTH_TLSDESC_ADD_LO12: write32Imm12(loc, val); break; case R_AARCH64_TLSDESC: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 26dc5c606f57f9..8b9f687b34f308 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -965,12 +965,14 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_SIZE: return r.sym->getSize() + a; case R_TLSDESC: + case RelExpr::R_AARCH64_AUTH_TLSDESC: return ctx.in.got->getTlsDescAddr(*r.sym) + a; case R_TLSDESC_PC: return ctx.in.got->getTlsDescAddr(*r.sym) + a - p; case R_TLSDESC_GOTPLT: return ctx.in.got->getTlsDescAddr(*r.sym) + a - ctx.in.gotPlt->getVA(); case RE_AARCH64_TLSDESC_PAGE: + case RE_AARCH64_AUTH_TLSDESC_PAGE: return getAArch64Page(ctx.in.got->getTlsDescAddr(*r.sym) + a) - getAArch64Page(p); case RE_LOONGARCH_TLSDESC_PAGE_PC: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 07cbdb7806fde1..088f2e2298d1fa 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1326,6 +1326,36 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, return 1; } + auto fatalBothAuthAndNonAuth = [&sym]() { +fatal("both AUTH and non-AUTH TLSDESC entries for '" + sym.getName() + + "' requested, but only one type of TLSDESC entry per symbol is " + "supported"); + }; + + // Do not optimize signed TLSDESC as described in pauthabielf64 to LE/IE. + // https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#general-restrictions + // > PAUTHELF64 only supports the descriptor based TLS (TLSDESC). + if (oneof( + expr)) { +assert(ctx.arg.emachine == EM_AARCH64); +if (!sym.hasFlag(NEEDS_TLSDESC)) + sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH); +else if (!sym.hasFlag(NEEDS_TLSDESC_AUTH)) + fatalBothAuthAndNonAuth(); +sec->addReloc({expr, type, offset, addend, &sym}); +return 1; + } + + if (sym.hasFlag(NEEDS_TLSDESC_AUTH)) { +assert(ctx.arg.emachine == EM_AARCH64); +// TLSDESC_CALL hint relocation probably should not be emitted by compiler +// with signed TLSDESC enabled since it does not give any value, but leave a +// check against that just in case someone uses it. +if (expr != R_TLSDESC_CALL) + fatalBothAuthAndNonAuth(); +return 1; + } + bool isRISCV = ctx.arg.emachine
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model (PR #113816)
https://github.com/kovdan01 updated https://github.com/llvm/llvm-project/pull/113816 >From ec3a34c2cb55f3179739c2c068e1c3d1c1de5b9a Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Fri, 25 Oct 2024 21:28:18 +0300 Subject: [PATCH 01/10] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model Support `R_AARCH64_AUTH_GOT_ADR_PREL_LO21` and `R_AARCH64_AUTH_GOT_LD_PREL19` GOT-generating relocations. --- lld/ELF/Arch/AArch64.cpp | 5 ++ lld/ELF/InputSection.cpp | 1 + lld/ELF/Relocations.cpp | 11 +-- lld/ELF/Relocations.h| 1 + lld/test/ELF/aarch64-got-relocations-pauth.s | 73 5 files changed, 87 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 9571e0e9566fc3..b63551d0f682e5 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -205,6 +205,9 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, case R_AARCH64_AUTH_LD64_GOT_LO12_NC: case R_AARCH64_AUTH_GOT_ADD_LO12_NC: return RE_AARCH64_AUTH_GOT; + case R_AARCH64_AUTH_GOT_LD_PREL19: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: +return RE_AARCH64_AUTH_GOT_PC; case R_AARCH64_LD64_GOTPAGE_LO15: return RE_AARCH64_GOT_PAGE; case R_AARCH64_ADR_GOT_PAGE: @@ -548,6 +551,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, write32AArch64Addr(loc, val >> 12); break; case R_AARCH64_ADR_PREL_LO21: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: checkInt(ctx, loc, val, 21, rel); write32AArch64Addr(loc, val); break; @@ -568,6 +572,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_CONDBR19: case R_AARCH64_LD_PREL_LO19: case R_AARCH64_GOT_LD_PREL19: + case R_AARCH64_AUTH_GOT_LD_PREL19: checkAlignment(ctx, loc, val, 4, rel); checkInt(ctx, loc, val, 21, rel); writeMaskedBits32le(loc, (val & 0x1C) << 3, 0x1C << 3); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 26dc5c606f57f9..76af4ec4193a4f 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -817,6 +817,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case RE_AARCH64_GOT_PAGE: return r.sym->getGotVA(ctx) + a - getAArch64Page(ctx.in.got->getVA()); case R_GOT_PC: + case R_AARCH64_AUTH_GOT_PC: case R_RELAX_TLS_GD_TO_IE: return r.sym->getGotVA(ctx) + a - p; case R_GOTPLT_GOTREL: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 07cbdb7806fde1..690b4933bb809e 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -197,8 +197,9 @@ static bool needsPlt(RelExpr expr) { } bool lld::elf::needsGot(RelExpr expr) { - return oneof( expr); @@ -981,7 +982,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT, R_GOTPLT_GOTREL, R_GOTPLT_PC, RE_PPC32_PLTREL, RE_PPC64_CALL_PLT, RE_PPC64_RELAX_TOC, RE_RISCV_ADD, RE_AARCH64_GOT_PAGE, -RE_AARCH64_AUTH_GOT, RE_LOONGARCH_PLT_PAGE_PC, RE_LOONGARCH_GOT, +RE_AARCH64_AUTH_GOT, RE_AARCH64_AUTH_GOT_PC, +RE_LOONGARCH_PLT_PAGE_PC, RE_LOONGARCH_GOT, RE_LOONGARCH_GOT_PAGE_PC>(e)) return true; @@ -1096,7 +1098,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, } else if (!sym.isTls() || ctx.arg.emachine != EM_LOONGARCH) { // Many LoongArch TLS relocs reuse the RE_LOONGARCH_GOT type, in which // case the NEEDS_GOT flag shouldn't get set. - if (expr == RE_AARCH64_AUTH_GOT || expr == RE_AARCH64_AUTH_GOT_PAGE_PC) + if (expr == RE_AARCH64_AUTH_GOT || expr == RE_AARCH64_AUTH_GOT_PAGE_PC || + expr == RE_AARCH64_AUTH_GOT_PAGE_PC) sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH); else sym.setFlags(NEEDS_GOT | NEEDS_GOT_NONAUTH); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index d993ab77adc3cc..fde25a230b72e6 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -95,6 +95,7 @@ enum RelExpr { RE_AARCH64_AUTH_GOT_PAGE_PC, RE_AARCH64_GOT_PAGE, RE_AARCH64_AUTH_GOT, + RE_AARCH64_AUTH_GOT_PC, RE_AARCH64_PAGE_PC, RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC, RE_AARCH64_TLSDESC_PAGE, diff --git a/lld/test/ELF/aarch64-got-relocations-pauth.s b/lld/test/ELF/aarch64-got-relocations-pauth.s index 985ab302259ea3..ef871d2af93671 100644 --- a/lld/test/ELF/aarch64-got-relocations-pauth.s +++ b/lld/test/ELF/aarch64-got-relocations-pauth.s @@ -77,6 +77,79 @@ _start: adrp x1, :got_auth:zed add x1, x1, :got_auth_lo12:zed +#--- ok-tiny.s + +# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux ok-tiny.s -o ok-tiny.o + +# RUN: ld.lld ok-tiny.o a.so -pie -o external-tiny +# RUN: llvm-readelf -r -S -x .got external-tiny | FileCheck %s --check-prefix=EXTERNAL-TINY + +# RUN:
[llvm-branch-commits] [lld] release/19.x: [lld][WebAssembly] Fix use of uninitialized stack data with --wasm64 (#107780) (PR #119723)
adambratschikaye wrote: @dschuff @nikic : Is there something that needs to be done to bump the LLVM version? https://github.com/llvm/llvm-project/pull/119723 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed TLSDESC (PR #113817)
https://github.com/kovdan01 edited https://github.com/llvm/llvm-project/pull/113817 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed TLSDESC (PR #113817)
https://github.com/kovdan01 updated https://github.com/llvm/llvm-project/pull/113817 >From e903e06eb6cfccfd65964ada48081e2ae1b2befd Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Fri, 25 Oct 2024 12:32:27 +0300 Subject: [PATCH 01/10] [PAC][lld][AArch64][ELF] Support signed TLSDESC Support `R_AARCH64_AUTH_TLSDESC_ADR_PAGE21`, `R_AARCH64_AUTH_TLSDESC_LD64_LO12` and `R_AARCH64_AUTH_TLSDESC_LD64_LO12` static TLSDESC relocations. --- lld/ELF/Arch/AArch64.cpp | 8 ++ lld/ELF/InputSection.cpp | 2 + lld/ELF/Relocations.cpp | 38 +++- lld/ELF/Relocations.h| 2 + lld/ELF/Symbols.h| 1 + lld/ELF/SyntheticSections.cpp| 5 + lld/test/ELF/aarch64-tlsdesc-pauth.s | 134 +++ 7 files changed, 188 insertions(+), 2 deletions(-) create mode 100644 lld/test/ELF/aarch64-tlsdesc-pauth.s diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 9571e0e9566fc3..68c9c5e20e0f94 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -157,9 +157,14 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, return RE_AARCH64_AUTH; case R_AARCH64_TLSDESC_ADR_PAGE21: return RE_AARCH64_TLSDESC_PAGE; + case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21: +return RE_AARCH64_AUTH_TLSDESC_PAGE; case R_AARCH64_TLSDESC_LD64_LO12: case R_AARCH64_TLSDESC_ADD_LO12: return R_TLSDESC; + case R_AARCH64_AUTH_TLSDESC_LD64_LO12: + case R_AARCH64_AUTH_TLSDESC_ADD_LO12: +return RE_AARCH64_AUTH_TLSDESC; case R_AARCH64_TLSDESC_CALL: return R_TLSDESC_CALL; case R_AARCH64_TLSLE_ADD_TPREL_HI12: @@ -542,6 +547,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_ADR_PREL_PG_HI21: case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: case R_AARCH64_TLSDESC_ADR_PAGE21: + case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21: checkInt(ctx, loc, val, 33, rel); [[fallthrough]]; case R_AARCH64_ADR_PREL_PG_HI21_NC: @@ -592,6 +598,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: case R_AARCH64_TLSDESC_LD64_LO12: + case R_AARCH64_AUTH_TLSDESC_LD64_LO12: checkAlignment(ctx, loc, val, 8, rel); write32Imm12(loc, getBits(val, 3, 11)); break; @@ -666,6 +673,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, break; case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: case R_AARCH64_TLSDESC_ADD_LO12: + case R_AARCH64_AUTH_TLSDESC_ADD_LO12: write32Imm12(loc, val); break; case R_AARCH64_TLSDESC: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 26dc5c606f57f9..8b9f687b34f308 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -965,12 +965,14 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case R_SIZE: return r.sym->getSize() + a; case R_TLSDESC: + case RelExpr::R_AARCH64_AUTH_TLSDESC: return ctx.in.got->getTlsDescAddr(*r.sym) + a; case R_TLSDESC_PC: return ctx.in.got->getTlsDescAddr(*r.sym) + a - p; case R_TLSDESC_GOTPLT: return ctx.in.got->getTlsDescAddr(*r.sym) + a - ctx.in.gotPlt->getVA(); case RE_AARCH64_TLSDESC_PAGE: + case RE_AARCH64_AUTH_TLSDESC_PAGE: return getAArch64Page(ctx.in.got->getTlsDescAddr(*r.sym) + a) - getAArch64Page(p); case RE_LOONGARCH_TLSDESC_PAGE_PC: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 07cbdb7806fde1..088f2e2298d1fa 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1326,6 +1326,36 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type, return 1; } + auto fatalBothAuthAndNonAuth = [&sym]() { +fatal("both AUTH and non-AUTH TLSDESC entries for '" + sym.getName() + + "' requested, but only one type of TLSDESC entry per symbol is " + "supported"); + }; + + // Do not optimize signed TLSDESC as described in pauthabielf64 to LE/IE. + // https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#general-restrictions + // > PAUTHELF64 only supports the descriptor based TLS (TLSDESC). + if (oneof( + expr)) { +assert(ctx.arg.emachine == EM_AARCH64); +if (!sym.hasFlag(NEEDS_TLSDESC)) + sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH); +else if (!sym.hasFlag(NEEDS_TLSDESC_AUTH)) + fatalBothAuthAndNonAuth(); +sec->addReloc({expr, type, offset, addend, &sym}); +return 1; + } + + if (sym.hasFlag(NEEDS_TLSDESC_AUTH)) { +assert(ctx.arg.emachine == EM_AARCH64); +// TLSDESC_CALL hint relocation probably should not be emitted by compiler +// with signed TLSDESC enabled since it does not give any value, but leave a +// check against that just in case someone uses it. +if (expr != R_TLSDESC_CALL) + fatalBothAuthAndNonAuth(); +return 1; + } + bool isRISCV = ctx.arg.emachine
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model (PR #113816)
https://github.com/kovdan01 updated https://github.com/llvm/llvm-project/pull/113816 >From de7feb154caec76b1ddf705689d1e7e8b4b2c491 Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Fri, 25 Oct 2024 21:28:18 +0300 Subject: [PATCH 1/9] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model Support `R_AARCH64_AUTH_GOT_ADR_PREL_LO21` and `R_AARCH64_AUTH_GOT_LD_PREL19` GOT-generating relocations. --- lld/ELF/Arch/AArch64.cpp | 5 ++ lld/ELF/InputSection.cpp | 1 + lld/ELF/Relocations.cpp | 11 +-- lld/ELF/Relocations.h| 1 + lld/test/ELF/aarch64-got-relocations-pauth.s | 73 5 files changed, 87 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 9571e0e9566fc3..b63551d0f682e5 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -205,6 +205,9 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, case R_AARCH64_AUTH_LD64_GOT_LO12_NC: case R_AARCH64_AUTH_GOT_ADD_LO12_NC: return RE_AARCH64_AUTH_GOT; + case R_AARCH64_AUTH_GOT_LD_PREL19: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: +return RE_AARCH64_AUTH_GOT_PC; case R_AARCH64_LD64_GOTPAGE_LO15: return RE_AARCH64_GOT_PAGE; case R_AARCH64_ADR_GOT_PAGE: @@ -548,6 +551,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, write32AArch64Addr(loc, val >> 12); break; case R_AARCH64_ADR_PREL_LO21: + case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: checkInt(ctx, loc, val, 21, rel); write32AArch64Addr(loc, val); break; @@ -568,6 +572,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, case R_AARCH64_CONDBR19: case R_AARCH64_LD_PREL_LO19: case R_AARCH64_GOT_LD_PREL19: + case R_AARCH64_AUTH_GOT_LD_PREL19: checkAlignment(ctx, loc, val, 4, rel); checkInt(ctx, loc, val, 21, rel); writeMaskedBits32le(loc, (val & 0x1C) << 3, 0x1C << 3); diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 26dc5c606f57f9..76af4ec4193a4f 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -817,6 +817,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r, case RE_AARCH64_GOT_PAGE: return r.sym->getGotVA(ctx) + a - getAArch64Page(ctx.in.got->getVA()); case R_GOT_PC: + case R_AARCH64_AUTH_GOT_PC: case R_RELAX_TLS_GD_TO_IE: return r.sym->getGotVA(ctx) + a - p; case R_GOTPLT_GOTREL: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 07cbdb7806fde1..690b4933bb809e 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -197,8 +197,9 @@ static bool needsPlt(RelExpr expr) { } bool lld::elf::needsGot(RelExpr expr) { - return oneof( expr); @@ -981,7 +982,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT, R_GOTPLT_GOTREL, R_GOTPLT_PC, RE_PPC32_PLTREL, RE_PPC64_CALL_PLT, RE_PPC64_RELAX_TOC, RE_RISCV_ADD, RE_AARCH64_GOT_PAGE, -RE_AARCH64_AUTH_GOT, RE_LOONGARCH_PLT_PAGE_PC, RE_LOONGARCH_GOT, +RE_AARCH64_AUTH_GOT, RE_AARCH64_AUTH_GOT_PC, +RE_LOONGARCH_PLT_PAGE_PC, RE_LOONGARCH_GOT, RE_LOONGARCH_GOT_PAGE_PC>(e)) return true; @@ -1096,7 +1098,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, } else if (!sym.isTls() || ctx.arg.emachine != EM_LOONGARCH) { // Many LoongArch TLS relocs reuse the RE_LOONGARCH_GOT type, in which // case the NEEDS_GOT flag shouldn't get set. - if (expr == RE_AARCH64_AUTH_GOT || expr == RE_AARCH64_AUTH_GOT_PAGE_PC) + if (expr == RE_AARCH64_AUTH_GOT || expr == RE_AARCH64_AUTH_GOT_PAGE_PC || + expr == RE_AARCH64_AUTH_GOT_PAGE_PC) sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH); else sym.setFlags(NEEDS_GOT | NEEDS_GOT_NONAUTH); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index d993ab77adc3cc..fde25a230b72e6 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -95,6 +95,7 @@ enum RelExpr { RE_AARCH64_AUTH_GOT_PAGE_PC, RE_AARCH64_GOT_PAGE, RE_AARCH64_AUTH_GOT, + RE_AARCH64_AUTH_GOT_PC, RE_AARCH64_PAGE_PC, RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC, RE_AARCH64_TLSDESC_PAGE, diff --git a/lld/test/ELF/aarch64-got-relocations-pauth.s b/lld/test/ELF/aarch64-got-relocations-pauth.s index a577e81ad0d035..f4db44e0bfb24e 100644 --- a/lld/test/ELF/aarch64-got-relocations-pauth.s +++ b/lld/test/ELF/aarch64-got-relocations-pauth.s @@ -77,6 +77,79 @@ _start: adrp x1, :got_auth:zed add x1, x1, :got_auth_lo12:zed +#--- ok-tiny.s + +# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux ok-tiny.s -o ok-tiny.o + +# RUN: ld.lld ok-tiny.o a.so -pie -o external-tiny +# RUN: llvm-readelf -r -S -x .got external-tiny | FileCheck %s --check-prefix=EXTERNAL-TINY + +# RUN: l
[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model (PR #113816)
@@ -77,6 +77,60 @@ _start: adrp x1, :got_auth:zed add x1, x1, :got_auth_lo12:zed +#--- ok-tiny.s +# RUN: llvm-mc -filetype=obj -triple=aarch64 ok-tiny.s -o ok-tiny.o + +# RUN: ld.lld ok-tiny.o a.so -pie -o external-tiny +# RUN: llvm-readelf -r -S -x .got external-tiny | FileCheck %s --check-prefix=EXTERNAL-TINY kovdan01 wrote: Thanks for suggestion, fixed in 7d949458a8c0ce85e52f558159298090fa3bf529, and also applied the same to #113815 https://github.com/llvm/llvm-project/pull/113816 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/116524 >From bf0d13553b2bc2124a266e398976ba80a1114580 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 14 Dec 2024 16:34:47 +0100 Subject: [PATCH 1/4] [mlir][Vector] Move mask materialization patterns to greedy rewrite The mask materialization patterns during `VectorToLLVM` are rewrite patterns. They should run as part of the greedy pattern rewrite and not the dialect conversion. (Rewrite patterns and conversion patterns are not generally compatible.) The current combination of rewrite patterns and conversion patterns triggered an edge case when merging the 1:1 and 1:N dialect conversions. --- .../VectorToLLVM/ConvertVectorToLLVMPass.cpp | 7 +- .../VectorToLLVM/vector-mask-to-llvm.mlir | 4 +- .../VectorToLLVM/vector-to-llvm.mlir | 4 +- .../VectorToLLVM/vector-xfer-to-llvm.mlir | 80 +-- 4 files changed, 44 insertions(+), 51 deletions(-) diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp index 4623b9667998cc..64a9ad8e9bade0 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp @@ -61,8 +61,8 @@ struct ConvertVectorToLLVMPass } // namespace void ConvertVectorToLLVMPass::runOnOperation() { - // Perform progressive lowering of operations on slices and - // all contraction operations. Also applies folding and DCE. + // Perform progressive lowering of operations on slices and all contraction + // operations. Also materializes masks, applies folding and DCE. { RewritePatternSet patterns(&getContext()); populateVectorToVectorCanonicalizationPatterns(patterns); @@ -76,6 +76,8 @@ void ConvertVectorToLLVMPass::runOnOperation() { VectorTransformsOptions()); // Vector transfer ops with rank > 1 should be lowered with VectorToSCF. populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1); +populateVectorMaskMaterializationPatterns(patterns, + force32BitVectorIndices); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); } @@ -83,7 +85,6 @@ void ConvertVectorToLLVMPass::runOnOperation() { LowerToLLVMOptions options(&getContext()); LLVMTypeConverter converter(&getContext(), options); RewritePatternSet patterns(&getContext()); - populateVectorMaskMaterializationPatterns(patterns, force32BitVectorIndices); populateVectorTransferLoweringPatterns(patterns); populateVectorToLLVMMatrixConversionPatterns(converter, patterns); populateVectorToLLVMConversionPatterns( diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir index 82351eb7c98a43..91e5358622b69d 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir @@ -7,7 +7,7 @@ // CMP32: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i32 // CMP32: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : vector<11xi32> // CMP32: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<11xi32> -// CMP32: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi32> +// CMP32: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi32> // CMP32: return %[[T4]] : vector<11xi1> // CMP64-LABEL: @genbool_var_1d( @@ -16,7 +16,7 @@ // CMP64: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i64 // CMP64: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : vector<11xi64> // CMP64: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<11xi64> -// CMP64: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi64> +// CMP64: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi64> // CMP64: return %[[T4]] : vector<11xi1> func.func @genbool_var_1d(%arg0: index) -> vector<11xi1> { diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 2473fe933ffcb2..ea88fece9e662d 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -3097,7 +3097,7 @@ func.func @create_mask_0d(%num_elems : index) -> vector { // CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32 // CHECK: %[[BOUNDS:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]] // CHECK: %[[BOUNDS_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOUNDS]] : vector<1xi32> to vector -// CHECK: %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS_CAST]] : vector +// CHECK: %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS_CAST]], %[[INDICES]] : vector // CHECK: return %[[RESULT]] : vector
[llvm-branch-commits] [lld] 42e6efd - Revert "[LLD][COFF] Introduce hybrid symbol table for EC input files on ARM64…"
Author: Jacek Caban Date: 2024-12-15T22:30:10+01:00 New Revision: 42e6efd5b07177f5a7f529b36022c98724e2df40 URL: https://github.com/llvm/llvm-project/commit/42e6efd5b07177f5a7f529b36022c98724e2df40 DIFF: https://github.com/llvm/llvm-project/commit/42e6efd5b07177f5a7f529b36022c98724e2df40.diff LOG: Revert "[LLD][COFF] Introduce hybrid symbol table for EC input files on ARM64…" This reverts commit a8206e7b37929f4754806667680ffba0206eef95. Added: Modified: lld/COFF/COFFLinkerContext.h lld/COFF/Driver.cpp lld/COFF/InputFiles.cpp lld/COFF/InputFiles.h lld/COFF/SymbolTable.h lld/test/COFF/arm64ec-codemap.test lld/test/COFF/arm64ec-entry-thunk.s lld/test/COFF/arm64ec-lib.test lld/test/COFF/arm64ec-range-thunks.s Removed: lld/test/COFF/arm64x-symtab.s diff --git a/lld/COFF/COFFLinkerContext.h b/lld/COFF/COFFLinkerContext.h index bdd625b8c3916b..5d89e97a7f7761 100644 --- a/lld/COFF/COFFLinkerContext.h +++ b/lld/COFF/COFFLinkerContext.h @@ -32,27 +32,6 @@ class COFFLinkerContext : public CommonLinkerContext { SymbolTable symtab; COFFOptTable optTable; - // A hybrid ARM64EC symbol table on ARM64X target. - std::optional hybridSymtab; - - // Pointer to the ARM64EC symbol table: either symtab for an ARM64EC target or - // hybridSymtab for an ARM64X target. - SymbolTable *symtabEC = nullptr; - - // Returns the appropriate symbol table for the specified machine type. - SymbolTable &getSymtab(llvm::COFF::MachineTypes machine) { -if (hybridSymtab && (machine == ARM64EC || machine == AMD64)) - return *hybridSymtab; -return symtab; - } - - // Invoke the specified callback for each symbol table. - void forEachSymtab(std::function f) { -f(symtab); -if (hybridSymtab) - f(*hybridSymtab); - } - std::vector objFileInstances; std::map pdbInputFileInstances; std::vector importFileInstances; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 64be0413f86ea1..0705f1c1be9992 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -596,17 +596,7 @@ void LinkerDriver::setMachine(MachineTypes machine) { assert(machine != IMAGE_FILE_MACHINE_UNKNOWN); ctx.config.machine = machine; - - if (machine != ARM64X) { -ctx.symtab.machine = machine; -if (machine == ARM64EC) - ctx.symtabEC = &ctx.symtab; - } else { -ctx.symtab.machine = ARM64; -ctx.hybridSymtab.emplace(ctx, ARM64EC); -ctx.symtabEC = &*ctx.hybridSymtab; - } - + ctx.symtab.machine = machine; addWinSysRootLibSearchPaths(); } @@ -2529,56 +2519,54 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (config->imageBase == uint64_t(-1)) config->imageBase = getDefaultImageBase(); - ctx.forEachSymtab([&](SymbolTable &symtab) { -symtab.addSynthetic(mangle("__ImageBase"), nullptr); -if (symtab.machine == I386) { - symtab.addAbsolute("___safe_se_handler_table", 0); - symtab.addAbsolute("___safe_se_handler_count", 0); -} - -symtab.addAbsolute(mangle("__guard_fids_count"), 0); -symtab.addAbsolute(mangle("__guard_fids_table"), 0); -symtab.addAbsolute(mangle("__guard_flags"), 0); -symtab.addAbsolute(mangle("__guard_iat_count"), 0); -symtab.addAbsolute(mangle("__guard_iat_table"), 0); -symtab.addAbsolute(mangle("__guard_longjmp_count"), 0); -symtab.addAbsolute(mangle("__guard_longjmp_table"), 0); -// Needed for MSVC 2017 15.5 CRT. -symtab.addAbsolute(mangle("__enclave_config"), 0); -// Needed for MSVC 2019 16.8 CRT. -symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0); -symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0); - -if (isArm64EC(ctx.config.machine)) { - symtab.addAbsolute("__arm64x_extra_rfe_table", 0); - symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0); - symtab.addAbsolute("__arm64x_redirection_metadata", 0); - symtab.addAbsolute("__arm64x_redirection_metadata_count", 0); - symtab.addAbsolute("__hybrid_auxiliary_delayload_iat_copy", 0); - symtab.addAbsolute("__hybrid_auxiliary_delayload_iat", 0); - symtab.addAbsolute("__hybrid_auxiliary_iat", 0); - symtab.addAbsolute("__hybrid_auxiliary_iat_copy", 0); - symtab.addAbsolute("__hybrid_code_map", 0); - symtab.addAbsolute("__hybrid_code_map_count", 0); - symtab.addAbsolute("__hybrid_image_info_bitfield", 0); - symtab.addAbsolute("__x64_code_ranges_to_entry_points", 0); - symtab.addAbsolute("__x64_code_ranges_to_entry_points_count", 0); - symtab.addSynthetic("__guard_check_icall_a64n_fptr", nullptr); - symtab.addSynthetic("__arm64x_native_entrypoint", nullptr); -} - -if (config->pseudoRelocs) { - symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0); - symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0); -} -if (config->mingw) { - symtab.addAbsolute(mangle("__CTOR_LIS
[llvm-branch-commits] [flang] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/116524 >From bc93c7840aa0d9d361f6f7aab08ec59b786bab2a Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sun, 15 Dec 2024 17:36:49 +0100 Subject: [PATCH] ex --- .../lib/Optimizer/CodeGen/BoxedProcedure.cpp | 1 - mlir/docs/DialectConversion.md| 35 +- .../mlir/Transforms/DialectConversion.h | 18 +- .../Conversion/LLVMCommon/TypeConverter.cpp | 14 +- .../EmitC/Transforms/TypeConversions.cpp | 1 - .../Dialect/Linalg/Transforms/Detensorize.cpp | 1 - .../Quant/Transforms/StripFuncQuantTypes.cpp | 1 - .../Utils/SparseTensorDescriptor.cpp | 3 - .../Vector/Transforms/VectorLinearize.cpp | 1 - .../Transforms/Utils/DialectConversion.cpp| 432 +- mlir/test/Transforms/test-legalizer.mlir | 7 +- .../Func/TestDecomposeCallGraphTypes.cpp | 2 +- mlir/test/lib/Dialect/Test/TestPatterns.cpp | 1 - .../lib/Transforms/TestDialectConversion.cpp | 1 - 14 files changed, 227 insertions(+), 291 deletions(-) diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp index 1bb91d252529f0..104ae7408b80c1 100644 --- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp +++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp @@ -172,7 +172,6 @@ class BoxprocTypeRewriter : public mlir::TypeConverter { addConversion([&](TypeDescType ty) { return TypeDescType::get(convertType(ty.getOfTy())); }); -addArgumentMaterialization(materializeProcedure); addSourceMaterialization(materializeProcedure); addTargetMaterialization(materializeProcedure); } diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md index 3168f5e13c7515..abacd5a82c61eb 100644 --- a/mlir/docs/DialectConversion.md +++ b/mlir/docs/DialectConversion.md @@ -242,19 +242,6 @@ cannot. These materializations are used by the conversion framework to ensure type safety during the conversion process. There are several types of materializations depending on the situation. -* Argument Materialization - -- An argument materialization is used when converting the type of a block -argument during a [signature conversion](#region-signature-conversion). -The new block argument types are specified in a `SignatureConversion` -object. An original block argument can be converted into multiple -block arguments, which is not supported everywhere in the dialect -conversion. (E.g., adaptors support only a single replacement value for -each original value.) Therefore, an argument materialization is used to -convert potentially multiple new block arguments back into a single SSA -value. An argument materialization is also used when replacing an op -result with multiple values. - * Source Materialization - A source materialization is used when a value was replaced with a value @@ -343,17 +330,6 @@ class TypeConverter { /// Materialization functions must be provided when a type conversion may /// persist after the conversion has finished. - /// This method registers a materialization that will be called when - /// converting (potentially multiple) block arguments that were the result of - /// a signature conversion of a single block argument, to a single SSA value - /// with the old argument type. - template ::template arg_t<1>> - void addArgumentMaterialization(FnT &&callback) { -argumentMaterializations.emplace_back( -wrapMaterialization(std::forward(callback))); - } - /// This method registers a materialization that will be called when /// converting a replacement value back to its original source type. /// This is used when some uses of the original value persist beyond the main @@ -406,12 +382,11 @@ done explicitly via a conversion pattern. To convert the types of block arguments within a Region, a custom hook on the `ConversionPatternRewriter` must be invoked; `convertRegionTypes`. This hook uses a provided type converter to apply type conversions to all blocks of a -given region. As noted above, the conversions performed by this method use the -argument materialization hook on the `TypeConverter`. This hook also takes an -optional `TypeConverter::SignatureConversion` parameter that applies a custom -conversion to the entry block of the region. The types of the entry block -arguments are often tied semantically to the operation, e.g., -`func::FuncOp`, `AffineForOp`, etc. +given region. This hook also takes an optional +`TypeConverter::SignatureConversion` parameter that applies a custom conversion +to the entry block of the region. The types of the entry block arguments are +often tied semantically to the operation, e.g., `func::FuncOp`, `AffineForOp`, +etc. To convert the signature of just one given block, the `applySignatureConversion` hook can be used. diff -
[llvm-branch-commits] [flang] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/116524 >From bf0d13553b2bc2124a266e398976ba80a1114580 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 14 Dec 2024 16:34:47 +0100 Subject: [PATCH 1/4] [mlir][Vector] Move mask materialization patterns to greedy rewrite The mask materialization patterns during `VectorToLLVM` are rewrite patterns. They should run as part of the greedy pattern rewrite and not the dialect conversion. (Rewrite patterns and conversion patterns are not generally compatible.) The current combination of rewrite patterns and conversion patterns triggered an edge case when merging the 1:1 and 1:N dialect conversions. --- .../VectorToLLVM/ConvertVectorToLLVMPass.cpp | 7 +- .../VectorToLLVM/vector-mask-to-llvm.mlir | 4 +- .../VectorToLLVM/vector-to-llvm.mlir | 4 +- .../VectorToLLVM/vector-xfer-to-llvm.mlir | 80 +-- 4 files changed, 44 insertions(+), 51 deletions(-) diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp index 4623b9667998cc..64a9ad8e9bade0 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp @@ -61,8 +61,8 @@ struct ConvertVectorToLLVMPass } // namespace void ConvertVectorToLLVMPass::runOnOperation() { - // Perform progressive lowering of operations on slices and - // all contraction operations. Also applies folding and DCE. + // Perform progressive lowering of operations on slices and all contraction + // operations. Also materializes masks, applies folding and DCE. { RewritePatternSet patterns(&getContext()); populateVectorToVectorCanonicalizationPatterns(patterns); @@ -76,6 +76,8 @@ void ConvertVectorToLLVMPass::runOnOperation() { VectorTransformsOptions()); // Vector transfer ops with rank > 1 should be lowered with VectorToSCF. populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1); +populateVectorMaskMaterializationPatterns(patterns, + force32BitVectorIndices); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); } @@ -83,7 +85,6 @@ void ConvertVectorToLLVMPass::runOnOperation() { LowerToLLVMOptions options(&getContext()); LLVMTypeConverter converter(&getContext(), options); RewritePatternSet patterns(&getContext()); - populateVectorMaskMaterializationPatterns(patterns, force32BitVectorIndices); populateVectorTransferLoweringPatterns(patterns); populateVectorToLLVMMatrixConversionPatterns(converter, patterns); populateVectorToLLVMConversionPatterns( diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir index 82351eb7c98a43..91e5358622b69d 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir @@ -7,7 +7,7 @@ // CMP32: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i32 // CMP32: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : vector<11xi32> // CMP32: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<11xi32> -// CMP32: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi32> +// CMP32: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi32> // CMP32: return %[[T4]] : vector<11xi1> // CMP64-LABEL: @genbool_var_1d( @@ -16,7 +16,7 @@ // CMP64: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i64 // CMP64: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : vector<11xi64> // CMP64: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<11xi64> -// CMP64: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi64> +// CMP64: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi64> // CMP64: return %[[T4]] : vector<11xi1> func.func @genbool_var_1d(%arg0: index) -> vector<11xi1> { diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 2473fe933ffcb2..ea88fece9e662d 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -3097,7 +3097,7 @@ func.func @create_mask_0d(%num_elems : index) -> vector { // CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32 // CHECK: %[[BOUNDS:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]] // CHECK: %[[BOUNDS_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOUNDS]] : vector<1xi32> to vector -// CHECK: %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS_CAST]] : vector +// CHECK: %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS_CAST]], %[[INDICES]] : vector // CHECK: return %[[RESULT]] : vector
[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect Conversion: No target mat. for 1:N replacement (PR #117513)
https://github.com/matthias-springer edited https://github.com/llvm/llvm-project/pull/117513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/116524 >From eff9c47de3405dc542644d5d64e5a26f793214d0 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sun, 15 Dec 2024 17:36:49 +0100 Subject: [PATCH] ex --- .../lib/Optimizer/CodeGen/BoxedProcedure.cpp | 1 - mlir/docs/DialectConversion.md| 35 +- .../mlir/Transforms/DialectConversion.h | 18 +- .../Conversion/LLVMCommon/TypeConverter.cpp | 14 +- .../EmitC/Transforms/TypeConversions.cpp | 1 - .../Dialect/Linalg/Transforms/Detensorize.cpp | 1 - .../Quant/Transforms/StripFuncQuantTypes.cpp | 1 - .../Utils/SparseTensorDescriptor.cpp | 3 - .../Vector/Transforms/VectorLinearize.cpp | 1 - .../Transforms/Utils/DialectConversion.cpp| 357 -- mlir/test/Transforms/test-legalizer.mlir | 7 +- .../Func/TestDecomposeCallGraphTypes.cpp | 2 +- mlir/test/lib/Dialect/Test/TestPatterns.cpp | 1 - .../lib/Transforms/TestDialectConversion.cpp | 1 - 14 files changed, 168 insertions(+), 275 deletions(-) diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp index 1bb91d252529f0..104ae7408b80c1 100644 --- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp +++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp @@ -172,7 +172,6 @@ class BoxprocTypeRewriter : public mlir::TypeConverter { addConversion([&](TypeDescType ty) { return TypeDescType::get(convertType(ty.getOfTy())); }); -addArgumentMaterialization(materializeProcedure); addSourceMaterialization(materializeProcedure); addTargetMaterialization(materializeProcedure); } diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md index 3168f5e13c7515..abacd5a82c61eb 100644 --- a/mlir/docs/DialectConversion.md +++ b/mlir/docs/DialectConversion.md @@ -242,19 +242,6 @@ cannot. These materializations are used by the conversion framework to ensure type safety during the conversion process. There are several types of materializations depending on the situation. -* Argument Materialization - -- An argument materialization is used when converting the type of a block -argument during a [signature conversion](#region-signature-conversion). -The new block argument types are specified in a `SignatureConversion` -object. An original block argument can be converted into multiple -block arguments, which is not supported everywhere in the dialect -conversion. (E.g., adaptors support only a single replacement value for -each original value.) Therefore, an argument materialization is used to -convert potentially multiple new block arguments back into a single SSA -value. An argument materialization is also used when replacing an op -result with multiple values. - * Source Materialization - A source materialization is used when a value was replaced with a value @@ -343,17 +330,6 @@ class TypeConverter { /// Materialization functions must be provided when a type conversion may /// persist after the conversion has finished. - /// This method registers a materialization that will be called when - /// converting (potentially multiple) block arguments that were the result of - /// a signature conversion of a single block argument, to a single SSA value - /// with the old argument type. - template ::template arg_t<1>> - void addArgumentMaterialization(FnT &&callback) { -argumentMaterializations.emplace_back( -wrapMaterialization(std::forward(callback))); - } - /// This method registers a materialization that will be called when /// converting a replacement value back to its original source type. /// This is used when some uses of the original value persist beyond the main @@ -406,12 +382,11 @@ done explicitly via a conversion pattern. To convert the types of block arguments within a Region, a custom hook on the `ConversionPatternRewriter` must be invoked; `convertRegionTypes`. This hook uses a provided type converter to apply type conversions to all blocks of a -given region. As noted above, the conversions performed by this method use the -argument materialization hook on the `TypeConverter`. This hook also takes an -optional `TypeConverter::SignatureConversion` parameter that applies a custom -conversion to the entry block of the region. The types of the entry block -arguments are often tied semantically to the operation, e.g., -`func::FuncOp`, `AffineForOp`, etc. +given region. This hook also takes an optional +`TypeConverter::SignatureConversion` parameter that applies a custom conversion +to the entry block of the region. The types of the entry block arguments are +often tied semantically to the operation, e.g., `func::FuncOp`, `AffineForOp`, +etc. To convert the signature of just one given block, the `applySignatureConversion` hook can be used. diff -
[llvm-branch-commits] [llvm] RegAlloc: Do not fatal error if there are no registers in the alloc order (PR #119640)
arsenm wrote: ### Merge activity * **Dec 15, 8:41 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/119640). https://github.com/llvm/llvm-project/pull/119640 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RegAlloc: Fix failure on undef use when all registers are reserved (PR #119647)
arsenm wrote: ### Merge activity * **Dec 15, 8:41 PM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/119647). https://github.com/llvm/llvm-project/pull/119647 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RegAlloc: Fix verifier error after failed allocation (PR #119690)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/119690 >From 2c3b6e52f9cb028c579f19379a59440a9dcbaba5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 25 Mar 2022 20:27:39 -0400 Subject: [PATCH] RegAlloc: Fix verifier error after failed allocation In some cases after reporting an allocation failure, this would fail the verifier. It picks the first allocatable register and assigns it, but didn't update the liveness appropriately. When VirtRegRewriter relied on the liveness to set kill flags, it would incorrectly add kill flags if there was another overlapping kill of the virtual register. We can't properly assign the register to an overlapping range, so break the liveness of the failing register (and any other interfering registers) instead. Give the virtual register dummy liveness by effectively deleting all the uses by setting them to undef. The edge case not tested here which I'm worried about is if the read of the register is a def of a subregister. I've been unable to come up with a test where this occurs. https://reviews.llvm.org/D122616 --- llvm/lib/CodeGen/RegAllocBase.cpp | 36 +++ llvm/lib/CodeGen/RegAllocBase.h | 6 ++ llvm/lib/CodeGen/RegAllocBasic.cpp| 1 + llvm/lib/CodeGen/RegAllocGreedy.cpp | 1 + .../AMDGPU/illegal-eviction-assert.mir| 4 +- llvm/test/CodeGen/AMDGPU/issue48473.mir | 3 +- ...ut-of-registers-error-all-regs-reserved.ll | 8 +-- ...lloc-failure-overlapping-insert-assert.mir | 7 +-- ...ster-killed-error-after-alloc-failure0.mir | 59 +++ ...ister-killed-error-after-alloc-failure1.ll | 30 ++ .../remaining-virtual-register-operands.ll| 3 +- llvm/test/CodeGen/X86/inline-asm-assertion.ll | 2 +- 12 files changed, 144 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir create mode 100644 llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index 980a6756963d9f..bb0c8a32a7bc92 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -65,6 +65,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis, Matrix = &mat; MRI->freezeReservedRegs(); RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); + FailedVRegs.clear(); } // Visit all the live registers. If they are already assigned to a physical @@ -128,6 +129,7 @@ void RegAllocBase::allocatePhysRegs() { // Keep going after reporting the error. VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg); + FailedVRegs.insert(VirtReg->reg()); } else if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); @@ -161,6 +163,40 @@ void RegAllocBase::postOptimization() { DeadRemats.clear(); } +void RegAllocBase::cleanupFailedVRegs() { + SmallSet JunkRegs; + + for (Register FailedReg : FailedVRegs) { +JunkRegs.insert(FailedReg); + +MCRegister PhysReg = VRM->getPhys(FailedReg); +LiveInterval &FailedInterval = LIS->getInterval(FailedReg); + +// The liveness information for the failed register and anything interfering +// with the physical register we arbitrarily chose is junk and needs to be +// deleted. +for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units); + for (const LiveInterval *InterferingReg : Q.interferingVRegs()) +JunkRegs.insert(InterferingReg->reg()); +} + } + + // TODO: Probably need to set undef on any physreg uses not associated with + // a virtual register. + for (Register JunkReg : JunkRegs) { +// We still should produce valid IR. Kill all the uses and reduce the live +// ranges so that we don't think it's possible to introduce kill flags +// later which will fail the verifier. +for (MachineOperand &MO : MRI->reg_operands(JunkReg)) { + if (MO.readsReg()) +MO.setIsUndef(true); +} + +LIS->shrinkToUses(&LIS->getInterval(JunkReg)); + } +} + void RegAllocBase::enqueue(const LiveInterval *LI) { const Register Reg = LI->reg(); diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h index 5bd52da61f2dc5..1fdbab694bb0e3 100644 --- a/llvm/lib/CodeGen/RegAllocBase.h +++ b/llvm/lib/CodeGen/RegAllocBase.h @@ -37,6 +37,7 @@ #define LLVM_LIB_CODEGEN_REGALLOCBASE_H #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocCommon.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -81,6 +82,7 @@ class RegAllocBase { /// always available for the remat of all the siblings of the original reg. SmallPtrSet DeadRemats; + SmallSet FailedVRegs; RegAllocBase(const RegAllocFilterFunc F = nullptr) : shouldAlloc
[llvm-branch-commits] [llvm] AMDGPU: Delete spills of undef values (PR #119684)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/119684 >From 69aa81bb9763c092605e9445f61657314f8c390d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 28 Mar 2022 11:24:48 -0400 Subject: [PATCH 1/2] AMDGPU: Delete spills of undef values It would be a bit more logical to preserve the undef and do the normal expansion, but this is less work. This avoids verifier errors in a future patch which starts deleting liveness from registers after allocation failures which results in spills of undef values. https://reviews.llvm.org/D122607 --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 12 ++ .../AMDGPU/sgpr-spill-partially-undef.mir | 42 +++ .../AMDGPU/spill-agpr-partially-undef.mir | 34 +++ llvm/test/CodeGen/AMDGPU/vgpr-spill.mir | 34 +++ 4 files changed, 122 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 2f5a99e5de5e3e..925d0d5eed2ef0 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1956,6 +1956,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, SlotIndexes *Indexes, LiveIntervals *LIS, bool OnlyToVGPR, bool SpillToPhysVGPRLane) const { + if (MI->getOperand(0).isUndef()) { +if (Indexes) + Indexes->removeMachineInstrFromMaps(*MI); +MI->eraseFromParent(); +return true; + } + SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); ArrayRef VGPRSpills = @@ -2377,6 +2384,11 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_WWM_AV32_SAVE: { const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); + if (VData->isUndef()) { +MI->eraseFromParent(); +return true; + } + assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()); diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir index 774785fb3966fc..d352e8a13da9f1 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir @@ -54,3 +54,45 @@ body: | SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ... + +--- +name: sgpr_spill_s32_undef +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + hasSpilledSGPRs: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +body: | + bb.0: +; CHECK-LABEL: name: sgpr_spill_s32_undef +; CHECK: body: +; CHECK-NEXT: bb.0: +; CHECK-NOT: {{.+}} +; CHECK: ... +SI_SPILL_S32_SAVE undef $sgpr8, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s32) into %stack.0, align 4, addrspace 5) + +... + +--- +name: sgpr_spill_s64_undef +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + hasSpilledSGPRs: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill } +body: | + bb.0: +; CHECK-LABEL: name: sgpr_spill_s64_undef +; CHECK: body: +; CHECK-NEXT: bb.0: +; CHECK-NOT: {{.+}} +; CHECK: ... +SI_SPILL_S64_SAVE undef $sgpr8_sgpr9, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + +... diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir index c825674de7652c..b02b6e79d7a76f 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir @@ -71,3 +71,37 @@ body: | ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5) SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) ... + +--- +name: spill_a32_undef +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: +; CHECK-LABEL: na
[llvm-branch-commits] [flang] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/116524 >From bf0d13553b2bc2124a266e398976ba80a1114580 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Sat, 14 Dec 2024 16:34:47 +0100 Subject: [PATCH 1/4] [mlir][Vector] Move mask materialization patterns to greedy rewrite The mask materialization patterns during `VectorToLLVM` are rewrite patterns. They should run as part of the greedy pattern rewrite and not the dialect conversion. (Rewrite patterns and conversion patterns are not generally compatible.) The current combination of rewrite patterns and conversion patterns triggered an edge case when merging the 1:1 and 1:N dialect conversions. --- .../VectorToLLVM/ConvertVectorToLLVMPass.cpp | 7 +- .../VectorToLLVM/vector-mask-to-llvm.mlir | 4 +- .../VectorToLLVM/vector-to-llvm.mlir | 4 +- .../VectorToLLVM/vector-xfer-to-llvm.mlir | 80 +-- 4 files changed, 44 insertions(+), 51 deletions(-) diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp index 4623b9667998cc..64a9ad8e9bade0 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp @@ -61,8 +61,8 @@ struct ConvertVectorToLLVMPass } // namespace void ConvertVectorToLLVMPass::runOnOperation() { - // Perform progressive lowering of operations on slices and - // all contraction operations. Also applies folding and DCE. + // Perform progressive lowering of operations on slices and all contraction + // operations. Also materializes masks, applies folding and DCE. { RewritePatternSet patterns(&getContext()); populateVectorToVectorCanonicalizationPatterns(patterns); @@ -76,6 +76,8 @@ void ConvertVectorToLLVMPass::runOnOperation() { VectorTransformsOptions()); // Vector transfer ops with rank > 1 should be lowered with VectorToSCF. populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1); +populateVectorMaskMaterializationPatterns(patterns, + force32BitVectorIndices); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); } @@ -83,7 +85,6 @@ void ConvertVectorToLLVMPass::runOnOperation() { LowerToLLVMOptions options(&getContext()); LLVMTypeConverter converter(&getContext(), options); RewritePatternSet patterns(&getContext()); - populateVectorMaskMaterializationPatterns(patterns, force32BitVectorIndices); populateVectorTransferLoweringPatterns(patterns); populateVectorToLLVMMatrixConversionPatterns(converter, patterns); populateVectorToLLVMConversionPatterns( diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir index 82351eb7c98a43..91e5358622b69d 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir @@ -7,7 +7,7 @@ // CMP32: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i32 // CMP32: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : vector<11xi32> // CMP32: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<11xi32> -// CMP32: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi32> +// CMP32: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi32> // CMP32: return %[[T4]] : vector<11xi1> // CMP64-LABEL: @genbool_var_1d( @@ -16,7 +16,7 @@ // CMP64: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i64 // CMP64: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : vector<11xi64> // CMP64: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<11xi64> -// CMP64: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi64> +// CMP64: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi64> // CMP64: return %[[T4]] : vector<11xi1> func.func @genbool_var_1d(%arg0: index) -> vector<11xi1> { diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 2473fe933ffcb2..ea88fece9e662d 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -3097,7 +3097,7 @@ func.func @create_mask_0d(%num_elems : index) -> vector { // CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32 // CHECK: %[[BOUNDS:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]] // CHECK: %[[BOUNDS_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOUNDS]] : vector<1xi32> to vector -// CHECK: %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS_CAST]] : vector +// CHECK: %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS_CAST]], %[[INDICES]] : vector // CHECK: return %[[RESULT]] : vector
[llvm-branch-commits] [clang] [llvm] AMDGPU: Fix libcall recognition of image array types (PR #119832)
@@ -622,9 +622,9 @@ bool ItaniumParamParser::parseItaniumParam(StringRef& param, if (isDigit(TC)) { res.ArgType = StringSwitch(eatLengthPrefixedName(param)) -.Case("ocl_image1darray", AMDGPULibFunc::IMG1DA) -.Case("ocl_image1dbuffer", AMDGPULibFunc::IMG1DB) -.Case("ocl_image2darray", AMDGPULibFunc::IMG2DA) +.StartsWith("ocl_image1d_array", AMDGPULibFunc::IMG1DA) +.StartsWith("ocl_image1d_buffer", AMDGPULibFunc::IMG1DB) +.StartsWith("ocl_image2d_array", AMDGPULibFunc::IMG2DA) ssahasra wrote: Shouldn't this change also fix the mangling generated in `getItaniumTypeName`? https://github.com/llvm/llvm-project/pull/119832 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Fix libcall recognition of image array types (PR #119832)
https://github.com/ssahasra approved this pull request. https://github.com/llvm/llvm-project/pull/119832 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] AMDGPU: Fix libcall recognition of image array types (PR #119832)
arsenm wrote: ### Merge activity * **Dec 16, 12:57 AM EST**: A user started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/119832). https://github.com/llvm/llvm-project/pull/119832 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][BoundsChecking] Add TrapBB local variable (PR #119983)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/119983 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][BoundsChecking] Add TrapBB local variable (PR #119983)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/119983 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][BoundsChecking] Add TrapBB local variable (PR #119983)
https://github.com/vitalybuka ready_for_review https://github.com/llvm/llvm-project/pull/119983 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [ubsan] Add runtime test for -fsanitize=local-bounds (PR #120038)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: Vitaly Buka (vitalybuka) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/120038.diff 1 Files Affected: - (added) compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp (+28) ``diff diff --git a/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp b/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp new file mode 100644 index 00..92e9e62dbc8698 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp @@ -0,0 +1,28 @@ +// RUN: %clangxx -fsanitize=local-bounds %s -O3 -o %t && %run %t 1 +// RUN: %clangxx -fsanitize=local-bounds %s -O3 -o %t && not --crash %run %t 3 + +// FIXME: it's always trap for now. + +#include + +struct S { + int k; + int l; +}; + +__attribute__((noinline)) void init(S *s) { + __asm__ __volatile__("" : : "r"(s) : "memory"); +} + +__attribute__((noinline, no_sanitize("memory"))) int test(char i) { + S a; + init(&a); + S b; + init(&b); + return ((int*)(&a))[i]; +} + +int main(int argc, char **argv) { + test(argv[1][0] - '0'); + return 0; +} `` https://github.com/llvm/llvm-project/pull/120038 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [ubsan] Add runtime test for -fsanitize=local-bounds (PR #120038)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/120038 None ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [ubsan] Add runtime test for -fsanitize=local-bounds (PR #120038)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff b42a2ec4a07d94c6c0d73d4baedf2ffef3d3825c 1e33ed956935df967b130f30e8e7b701f18304d3 --extensions cpp -- compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp `` View the diff from clang-format here. ``diff diff --git a/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp b/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp index 92e9e62dbc..edfe439c92 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp @@ -19,7 +19,7 @@ __attribute__((noinline, no_sanitize("memory"))) int test(char i) { init(&a); S b; init(&b); - return ((int*)(&a))[i]; + return ((int *)(&a))[i]; } int main(int argc, char **argv) { `` https://github.com/llvm/llvm-project/pull/120038 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Delete spills of undef values (PR #119684)
@@ -54,3 +54,45 @@ body: | SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) ... + +--- +name: sgpr_spill_s32_undef +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + hasSpilledSGPRs: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +body: | + bb.0: +; CHECK-LABEL: name: sgpr_spill_s32_undef +; CHECK: body: +; CHECK-NEXT: bb.0: +; CHECK-NOT: {{.+}} arsenm wrote: The -NEXT check is wrong because there is a blank line. I couldn't get the regex to match a blank line to work so I did this. Really update_mir_test_checks should understand how to match an empty block, anything here is a hack https://github.com/llvm/llvm-project/pull/119684 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits