[llvm-branch-commits] [NFC][BoundsChecking] Add TrapBB local variable (PR #119983)

2024-12-15 Thread Thurston Dang via llvm-branch-commits

https://github.com/thurstond approved this pull request.


https://github.com/llvm/llvm-project/pull/119983
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [ubsan] Add runtime test for -fsanitize=local-bounds (PR #120038)

2024-12-15 Thread Thurston Dang via llvm-branch-commits

https://github.com/thurstond approved this pull request.


https://github.com/llvm/llvm-project/pull/120038
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed TLSDESC (PR #113817)

2024-12-15 Thread Daniil Kovalev via llvm-branch-commits

https://github.com/kovdan01 updated 
https://github.com/llvm/llvm-project/pull/113817

>From f2dc47f188eb68bb53bb60d85d3d617bcf90d823 Mon Sep 17 00:00:00 2001
From: Daniil Kovalev 
Date: Fri, 25 Oct 2024 12:32:27 +0300
Subject: [PATCH 01/10] [PAC][lld][AArch64][ELF] Support signed TLSDESC

Support `R_AARCH64_AUTH_TLSDESC_ADR_PAGE21`, `R_AARCH64_AUTH_TLSDESC_LD64_LO12`
and `R_AARCH64_AUTH_TLSDESC_LD64_LO12` static TLSDESC relocations.
---
 lld/ELF/Arch/AArch64.cpp |   8 ++
 lld/ELF/InputSection.cpp |   2 +
 lld/ELF/Relocations.cpp  |  38 +++-
 lld/ELF/Relocations.h|   2 +
 lld/ELF/Symbols.h|   1 +
 lld/ELF/SyntheticSections.cpp|   5 +
 lld/test/ELF/aarch64-tlsdesc-pauth.s | 134 +++
 7 files changed, 188 insertions(+), 2 deletions(-)
 create mode 100644 lld/test/ELF/aarch64-tlsdesc-pauth.s

diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 9571e0e9566fc3..68c9c5e20e0f94 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -157,9 +157,14 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
 return RE_AARCH64_AUTH;
   case R_AARCH64_TLSDESC_ADR_PAGE21:
 return RE_AARCH64_TLSDESC_PAGE;
+  case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
+return RE_AARCH64_AUTH_TLSDESC_PAGE;
   case R_AARCH64_TLSDESC_LD64_LO12:
   case R_AARCH64_TLSDESC_ADD_LO12:
 return R_TLSDESC;
+  case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
+  case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
+return RE_AARCH64_AUTH_TLSDESC;
   case R_AARCH64_TLSDESC_CALL:
 return R_TLSDESC_CALL;
   case R_AARCH64_TLSLE_ADD_TPREL_HI12:
@@ -542,6 +547,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
   case R_AARCH64_ADR_PREL_PG_HI21:
   case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
   case R_AARCH64_TLSDESC_ADR_PAGE21:
+  case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
 checkInt(ctx, loc, val, 33, rel);
 [[fallthrough]];
   case R_AARCH64_ADR_PREL_PG_HI21_NC:
@@ -592,6 +598,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
   case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
   case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
   case R_AARCH64_TLSDESC_LD64_LO12:
+  case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
 checkAlignment(ctx, loc, val, 8, rel);
 write32Imm12(loc, getBits(val, 3, 11));
 break;
@@ -666,6 +673,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
 break;
   case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
   case R_AARCH64_TLSDESC_ADD_LO12:
+  case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
 write32Imm12(loc, val);
 break;
   case R_AARCH64_TLSDESC:
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 26dc5c606f57f9..8b9f687b34f308 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -965,12 +965,14 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, 
const Relocation &r,
   case R_SIZE:
 return r.sym->getSize() + a;
   case R_TLSDESC:
+  case RelExpr::R_AARCH64_AUTH_TLSDESC:
 return ctx.in.got->getTlsDescAddr(*r.sym) + a;
   case R_TLSDESC_PC:
 return ctx.in.got->getTlsDescAddr(*r.sym) + a - p;
   case R_TLSDESC_GOTPLT:
 return ctx.in.got->getTlsDescAddr(*r.sym) + a - ctx.in.gotPlt->getVA();
   case RE_AARCH64_TLSDESC_PAGE:
+  case RE_AARCH64_AUTH_TLSDESC_PAGE:
 return getAArch64Page(ctx.in.got->getTlsDescAddr(*r.sym) + a) -
getAArch64Page(p);
   case RE_LOONGARCH_TLSDESC_PAGE_PC:
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 07cbdb7806fde1..088f2e2298d1fa 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1326,6 +1326,36 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr 
expr, RelType type,
 return 1;
   }
 
+  auto fatalBothAuthAndNonAuth = [&sym]() {
+fatal("both AUTH and non-AUTH TLSDESC entries for '" + sym.getName() +
+  "' requested, but only one type of TLSDESC entry per symbol is "
+  "supported");
+  };
+
+  // Do not optimize signed TLSDESC as described in pauthabielf64 to LE/IE.
+  // 
https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#general-restrictions
+  // > PAUTHELF64 only supports the descriptor based TLS (TLSDESC).
+  if (oneof(
+  expr)) {
+assert(ctx.arg.emachine == EM_AARCH64);
+if (!sym.hasFlag(NEEDS_TLSDESC))
+  sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH);
+else if (!sym.hasFlag(NEEDS_TLSDESC_AUTH))
+  fatalBothAuthAndNonAuth();
+sec->addReloc({expr, type, offset, addend, &sym});
+return 1;
+  }
+
+  if (sym.hasFlag(NEEDS_TLSDESC_AUTH)) {
+assert(ctx.arg.emachine == EM_AARCH64);
+// TLSDESC_CALL hint relocation probably should not be emitted by compiler
+// with signed TLSDESC enabled since it does not give any value, but leave 
a
+// check against that just in case someone uses it.
+if (expr != R_TLSDESC_CALL)
+  fatalBothAuthAndNonAuth();
+return 1;
+  }
+
   bool isRISCV = ctx.arg.emachine

[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model (PR #113816)

2024-12-15 Thread Daniil Kovalev via llvm-branch-commits

https://github.com/kovdan01 updated 
https://github.com/llvm/llvm-project/pull/113816

>From ec3a34c2cb55f3179739c2c068e1c3d1c1de5b9a Mon Sep 17 00:00:00 2001
From: Daniil Kovalev 
Date: Fri, 25 Oct 2024 21:28:18 +0300
Subject: [PATCH 01/10] [PAC][lld][AArch64][ELF] Support signed GOT with tiny
 code model

Support `R_AARCH64_AUTH_GOT_ADR_PREL_LO21` and `R_AARCH64_AUTH_GOT_LD_PREL19`
GOT-generating relocations.
---
 lld/ELF/Arch/AArch64.cpp |  5 ++
 lld/ELF/InputSection.cpp |  1 +
 lld/ELF/Relocations.cpp  | 11 +--
 lld/ELF/Relocations.h|  1 +
 lld/test/ELF/aarch64-got-relocations-pauth.s | 73 
 5 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 9571e0e9566fc3..b63551d0f682e5 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -205,6 +205,9 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
   case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
   case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
 return RE_AARCH64_AUTH_GOT;
+  case R_AARCH64_AUTH_GOT_LD_PREL19:
+  case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
+return RE_AARCH64_AUTH_GOT_PC;
   case R_AARCH64_LD64_GOTPAGE_LO15:
 return RE_AARCH64_GOT_PAGE;
   case R_AARCH64_ADR_GOT_PAGE:
@@ -548,6 +551,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
 write32AArch64Addr(loc, val >> 12);
 break;
   case R_AARCH64_ADR_PREL_LO21:
+  case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
 checkInt(ctx, loc, val, 21, rel);
 write32AArch64Addr(loc, val);
 break;
@@ -568,6 +572,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
   case R_AARCH64_CONDBR19:
   case R_AARCH64_LD_PREL_LO19:
   case R_AARCH64_GOT_LD_PREL19:
+  case R_AARCH64_AUTH_GOT_LD_PREL19:
 checkAlignment(ctx, loc, val, 4, rel);
 checkInt(ctx, loc, val, 21, rel);
 writeMaskedBits32le(loc, (val & 0x1C) << 3, 0x1C << 3);
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 26dc5c606f57f9..76af4ec4193a4f 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -817,6 +817,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const 
Relocation &r,
   case RE_AARCH64_GOT_PAGE:
 return r.sym->getGotVA(ctx) + a - getAArch64Page(ctx.in.got->getVA());
   case R_GOT_PC:
+  case R_AARCH64_AUTH_GOT_PC:
   case R_RELAX_TLS_GD_TO_IE:
 return r.sym->getGotVA(ctx) + a - p;
   case R_GOTPLT_GOTREL:
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 07cbdb7806fde1..690b4933bb809e 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -197,8 +197,9 @@ static bool needsPlt(RelExpr expr) {
 }
 
 bool lld::elf::needsGot(RelExpr expr) {
-  return oneof(
   expr);
@@ -981,7 +982,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, 
RelType type,
 R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT,
 R_GOTPLT_GOTREL, R_GOTPLT_PC, RE_PPC32_PLTREL, RE_PPC64_CALL_PLT,
 RE_PPC64_RELAX_TOC, RE_RISCV_ADD, RE_AARCH64_GOT_PAGE,
-RE_AARCH64_AUTH_GOT, RE_LOONGARCH_PLT_PAGE_PC, RE_LOONGARCH_GOT,
+RE_AARCH64_AUTH_GOT, RE_AARCH64_AUTH_GOT_PC,
+RE_LOONGARCH_PLT_PAGE_PC, RE_LOONGARCH_GOT,
 RE_LOONGARCH_GOT_PAGE_PC>(e))
 return true;
 
@@ -1096,7 +1098,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType 
type, uint64_t offset,
 } else if (!sym.isTls() || ctx.arg.emachine != EM_LOONGARCH) {
   // Many LoongArch TLS relocs reuse the RE_LOONGARCH_GOT type, in which
   // case the NEEDS_GOT flag shouldn't get set.
-  if (expr == RE_AARCH64_AUTH_GOT || expr == RE_AARCH64_AUTH_GOT_PAGE_PC)
+  if (expr == RE_AARCH64_AUTH_GOT || expr == RE_AARCH64_AUTH_GOT_PAGE_PC ||
+  expr == RE_AARCH64_AUTH_GOT_PAGE_PC)
 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
   else
 sym.setFlags(NEEDS_GOT | NEEDS_GOT_NONAUTH);
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index d993ab77adc3cc..fde25a230b72e6 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -95,6 +95,7 @@ enum RelExpr {
   RE_AARCH64_AUTH_GOT_PAGE_PC,
   RE_AARCH64_GOT_PAGE,
   RE_AARCH64_AUTH_GOT,
+  RE_AARCH64_AUTH_GOT_PC,
   RE_AARCH64_PAGE_PC,
   RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC,
   RE_AARCH64_TLSDESC_PAGE,
diff --git a/lld/test/ELF/aarch64-got-relocations-pauth.s 
b/lld/test/ELF/aarch64-got-relocations-pauth.s
index 985ab302259ea3..ef871d2af93671 100644
--- a/lld/test/ELF/aarch64-got-relocations-pauth.s
+++ b/lld/test/ELF/aarch64-got-relocations-pauth.s
@@ -77,6 +77,79 @@ _start:
   adrp x1, :got_auth:zed
   add  x1, x1, :got_auth_lo12:zed
 
+#--- ok-tiny.s
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux ok-tiny.s -o ok-tiny.o
+
+# RUN: ld.lld ok-tiny.o a.so -pie -o external-tiny
+# RUN: llvm-readelf -r -S -x .got external-tiny | FileCheck %s 
--check-prefix=EXTERNAL-TINY
+
+# RUN:

[llvm-branch-commits] [lld] release/19.x: [lld][WebAssembly] Fix use of uninitialized stack data with --wasm64 (#107780) (PR #119723)

2024-12-15 Thread Adam Bratschi-Kaye via llvm-branch-commits

adambratschikaye wrote:

@dschuff @nikic : Is there something that needs to be done to bump the LLVM 
version?

https://github.com/llvm/llvm-project/pull/119723
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed TLSDESC (PR #113817)

2024-12-15 Thread Daniil Kovalev via llvm-branch-commits

https://github.com/kovdan01 edited 
https://github.com/llvm/llvm-project/pull/113817
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed TLSDESC (PR #113817)

2024-12-15 Thread Daniil Kovalev via llvm-branch-commits

https://github.com/kovdan01 updated 
https://github.com/llvm/llvm-project/pull/113817

>From e903e06eb6cfccfd65964ada48081e2ae1b2befd Mon Sep 17 00:00:00 2001
From: Daniil Kovalev 
Date: Fri, 25 Oct 2024 12:32:27 +0300
Subject: [PATCH 01/10] [PAC][lld][AArch64][ELF] Support signed TLSDESC

Support `R_AARCH64_AUTH_TLSDESC_ADR_PAGE21`, `R_AARCH64_AUTH_TLSDESC_LD64_LO12`
and `R_AARCH64_AUTH_TLSDESC_LD64_LO12` static TLSDESC relocations.
---
 lld/ELF/Arch/AArch64.cpp |   8 ++
 lld/ELF/InputSection.cpp |   2 +
 lld/ELF/Relocations.cpp  |  38 +++-
 lld/ELF/Relocations.h|   2 +
 lld/ELF/Symbols.h|   1 +
 lld/ELF/SyntheticSections.cpp|   5 +
 lld/test/ELF/aarch64-tlsdesc-pauth.s | 134 +++
 7 files changed, 188 insertions(+), 2 deletions(-)
 create mode 100644 lld/test/ELF/aarch64-tlsdesc-pauth.s

diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 9571e0e9566fc3..68c9c5e20e0f94 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -157,9 +157,14 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
 return RE_AARCH64_AUTH;
   case R_AARCH64_TLSDESC_ADR_PAGE21:
 return RE_AARCH64_TLSDESC_PAGE;
+  case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
+return RE_AARCH64_AUTH_TLSDESC_PAGE;
   case R_AARCH64_TLSDESC_LD64_LO12:
   case R_AARCH64_TLSDESC_ADD_LO12:
 return R_TLSDESC;
+  case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
+  case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
+return RE_AARCH64_AUTH_TLSDESC;
   case R_AARCH64_TLSDESC_CALL:
 return R_TLSDESC_CALL;
   case R_AARCH64_TLSLE_ADD_TPREL_HI12:
@@ -542,6 +547,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
   case R_AARCH64_ADR_PREL_PG_HI21:
   case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
   case R_AARCH64_TLSDESC_ADR_PAGE21:
+  case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21:
 checkInt(ctx, loc, val, 33, rel);
 [[fallthrough]];
   case R_AARCH64_ADR_PREL_PG_HI21_NC:
@@ -592,6 +598,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
   case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
   case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
   case R_AARCH64_TLSDESC_LD64_LO12:
+  case R_AARCH64_AUTH_TLSDESC_LD64_LO12:
 checkAlignment(ctx, loc, val, 8, rel);
 write32Imm12(loc, getBits(val, 3, 11));
 break;
@@ -666,6 +673,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
 break;
   case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
   case R_AARCH64_TLSDESC_ADD_LO12:
+  case R_AARCH64_AUTH_TLSDESC_ADD_LO12:
 write32Imm12(loc, val);
 break;
   case R_AARCH64_TLSDESC:
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 26dc5c606f57f9..8b9f687b34f308 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -965,12 +965,14 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, 
const Relocation &r,
   case R_SIZE:
 return r.sym->getSize() + a;
   case R_TLSDESC:
+  case RelExpr::R_AARCH64_AUTH_TLSDESC:
 return ctx.in.got->getTlsDescAddr(*r.sym) + a;
   case R_TLSDESC_PC:
 return ctx.in.got->getTlsDescAddr(*r.sym) + a - p;
   case R_TLSDESC_GOTPLT:
 return ctx.in.got->getTlsDescAddr(*r.sym) + a - ctx.in.gotPlt->getVA();
   case RE_AARCH64_TLSDESC_PAGE:
+  case RE_AARCH64_AUTH_TLSDESC_PAGE:
 return getAArch64Page(ctx.in.got->getTlsDescAddr(*r.sym) + a) -
getAArch64Page(p);
   case RE_LOONGARCH_TLSDESC_PAGE_PC:
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 07cbdb7806fde1..088f2e2298d1fa 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1326,6 +1326,36 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr 
expr, RelType type,
 return 1;
   }
 
+  auto fatalBothAuthAndNonAuth = [&sym]() {
+fatal("both AUTH and non-AUTH TLSDESC entries for '" + sym.getName() +
+  "' requested, but only one type of TLSDESC entry per symbol is "
+  "supported");
+  };
+
+  // Do not optimize signed TLSDESC as described in pauthabielf64 to LE/IE.
+  // 
https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#general-restrictions
+  // > PAUTHELF64 only supports the descriptor based TLS (TLSDESC).
+  if (oneof(
+  expr)) {
+assert(ctx.arg.emachine == EM_AARCH64);
+if (!sym.hasFlag(NEEDS_TLSDESC))
+  sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_AUTH);
+else if (!sym.hasFlag(NEEDS_TLSDESC_AUTH))
+  fatalBothAuthAndNonAuth();
+sec->addReloc({expr, type, offset, addend, &sym});
+return 1;
+  }
+
+  if (sym.hasFlag(NEEDS_TLSDESC_AUTH)) {
+assert(ctx.arg.emachine == EM_AARCH64);
+// TLSDESC_CALL hint relocation probably should not be emitted by compiler
+// with signed TLSDESC enabled since it does not give any value, but leave 
a
+// check against that just in case someone uses it.
+if (expr != R_TLSDESC_CALL)
+  fatalBothAuthAndNonAuth();
+return 1;
+  }
+
   bool isRISCV = ctx.arg.emachine

[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model (PR #113816)

2024-12-15 Thread Daniil Kovalev via llvm-branch-commits

https://github.com/kovdan01 updated 
https://github.com/llvm/llvm-project/pull/113816

>From de7feb154caec76b1ddf705689d1e7e8b4b2c491 Mon Sep 17 00:00:00 2001
From: Daniil Kovalev 
Date: Fri, 25 Oct 2024 21:28:18 +0300
Subject: [PATCH 1/9] [PAC][lld][AArch64][ELF] Support signed GOT with tiny
 code model

Support `R_AARCH64_AUTH_GOT_ADR_PREL_LO21` and `R_AARCH64_AUTH_GOT_LD_PREL19`
GOT-generating relocations.
---
 lld/ELF/Arch/AArch64.cpp |  5 ++
 lld/ELF/InputSection.cpp |  1 +
 lld/ELF/Relocations.cpp  | 11 +--
 lld/ELF/Relocations.h|  1 +
 lld/test/ELF/aarch64-got-relocations-pauth.s | 73 
 5 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 9571e0e9566fc3..b63551d0f682e5 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -205,6 +205,9 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
   case R_AARCH64_AUTH_LD64_GOT_LO12_NC:
   case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
 return RE_AARCH64_AUTH_GOT;
+  case R_AARCH64_AUTH_GOT_LD_PREL19:
+  case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
+return RE_AARCH64_AUTH_GOT_PC;
   case R_AARCH64_LD64_GOTPAGE_LO15:
 return RE_AARCH64_GOT_PAGE;
   case R_AARCH64_ADR_GOT_PAGE:
@@ -548,6 +551,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
 write32AArch64Addr(loc, val >> 12);
 break;
   case R_AARCH64_ADR_PREL_LO21:
+  case R_AARCH64_AUTH_GOT_ADR_PREL_LO21:
 checkInt(ctx, loc, val, 21, rel);
 write32AArch64Addr(loc, val);
 break;
@@ -568,6 +572,7 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel,
   case R_AARCH64_CONDBR19:
   case R_AARCH64_LD_PREL_LO19:
   case R_AARCH64_GOT_LD_PREL19:
+  case R_AARCH64_AUTH_GOT_LD_PREL19:
 checkAlignment(ctx, loc, val, 4, rel);
 checkInt(ctx, loc, val, 21, rel);
 writeMaskedBits32le(loc, (val & 0x1C) << 3, 0x1C << 3);
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 26dc5c606f57f9..76af4ec4193a4f 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -817,6 +817,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const 
Relocation &r,
   case RE_AARCH64_GOT_PAGE:
 return r.sym->getGotVA(ctx) + a - getAArch64Page(ctx.in.got->getVA());
   case R_GOT_PC:
+  case R_AARCH64_AUTH_GOT_PC:
   case R_RELAX_TLS_GD_TO_IE:
 return r.sym->getGotVA(ctx) + a - p;
   case R_GOTPLT_GOTREL:
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 07cbdb7806fde1..690b4933bb809e 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -197,8 +197,9 @@ static bool needsPlt(RelExpr expr) {
 }
 
 bool lld::elf::needsGot(RelExpr expr) {
-  return oneof(
   expr);
@@ -981,7 +982,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, 
RelType type,
 R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTREL, R_PLT_GOTPLT,
 R_GOTPLT_GOTREL, R_GOTPLT_PC, RE_PPC32_PLTREL, RE_PPC64_CALL_PLT,
 RE_PPC64_RELAX_TOC, RE_RISCV_ADD, RE_AARCH64_GOT_PAGE,
-RE_AARCH64_AUTH_GOT, RE_LOONGARCH_PLT_PAGE_PC, RE_LOONGARCH_GOT,
+RE_AARCH64_AUTH_GOT, RE_AARCH64_AUTH_GOT_PC,
+RE_LOONGARCH_PLT_PAGE_PC, RE_LOONGARCH_GOT,
 RE_LOONGARCH_GOT_PAGE_PC>(e))
 return true;
 
@@ -1096,7 +1098,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType 
type, uint64_t offset,
 } else if (!sym.isTls() || ctx.arg.emachine != EM_LOONGARCH) {
   // Many LoongArch TLS relocs reuse the RE_LOONGARCH_GOT type, in which
   // case the NEEDS_GOT flag shouldn't get set.
-  if (expr == RE_AARCH64_AUTH_GOT || expr == RE_AARCH64_AUTH_GOT_PAGE_PC)
+  if (expr == RE_AARCH64_AUTH_GOT || expr == RE_AARCH64_AUTH_GOT_PAGE_PC ||
+  expr == RE_AARCH64_AUTH_GOT_PAGE_PC)
 sym.setFlags(NEEDS_GOT | NEEDS_GOT_AUTH);
   else
 sym.setFlags(NEEDS_GOT | NEEDS_GOT_NONAUTH);
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index d993ab77adc3cc..fde25a230b72e6 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -95,6 +95,7 @@ enum RelExpr {
   RE_AARCH64_AUTH_GOT_PAGE_PC,
   RE_AARCH64_GOT_PAGE,
   RE_AARCH64_AUTH_GOT,
+  RE_AARCH64_AUTH_GOT_PC,
   RE_AARCH64_PAGE_PC,
   RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC,
   RE_AARCH64_TLSDESC_PAGE,
diff --git a/lld/test/ELF/aarch64-got-relocations-pauth.s 
b/lld/test/ELF/aarch64-got-relocations-pauth.s
index a577e81ad0d035..f4db44e0bfb24e 100644
--- a/lld/test/ELF/aarch64-got-relocations-pauth.s
+++ b/lld/test/ELF/aarch64-got-relocations-pauth.s
@@ -77,6 +77,79 @@ _start:
   adrp x1, :got_auth:zed
   add  x1, x1, :got_auth_lo12:zed
 
+#--- ok-tiny.s
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux ok-tiny.s -o ok-tiny.o
+
+# RUN: ld.lld ok-tiny.o a.so -pie -o external-tiny
+# RUN: llvm-readelf -r -S -x .got external-tiny | FileCheck %s 
--check-prefix=EXTERNAL-TINY
+
+# RUN: l

[llvm-branch-commits] [lld] [PAC][lld][AArch64][ELF] Support signed GOT with tiny code model (PR #113816)

2024-12-15 Thread Daniil Kovalev via llvm-branch-commits


@@ -77,6 +77,60 @@ _start:
   adrp x1, :got_auth:zed
   add  x1, x1, :got_auth_lo12:zed
 
+#--- ok-tiny.s
+# RUN: llvm-mc -filetype=obj -triple=aarch64 ok-tiny.s -o ok-tiny.o
+
+# RUN: ld.lld ok-tiny.o a.so -pie -o external-tiny
+# RUN: llvm-readelf -r -S -x .got external-tiny | FileCheck %s 
--check-prefix=EXTERNAL-TINY

kovdan01 wrote:

Thanks for suggestion, fixed in 7d949458a8c0ce85e52f558159298090fa3bf529, and 
also applied the same to #113815

https://github.com/llvm/llvm-project/pull/113816
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)

2024-12-15 Thread Matthias Springer via llvm-branch-commits

https://github.com/matthias-springer updated 
https://github.com/llvm/llvm-project/pull/116524

>From bf0d13553b2bc2124a266e398976ba80a1114580 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Sat, 14 Dec 2024 16:34:47 +0100
Subject: [PATCH 1/4] [mlir][Vector] Move mask materialization patterns to
 greedy rewrite

The mask materialization patterns during `VectorToLLVM` are rewrite patterns. 
They should run as part of the greedy pattern rewrite and not the dialect 
conversion. (Rewrite patterns and conversion patterns are not generally 
compatible.)

The current combination of rewrite patterns and conversion patterns triggered 
an edge case when merging the 1:1 and 1:N dialect conversions.
---
 .../VectorToLLVM/ConvertVectorToLLVMPass.cpp  |  7 +-
 .../VectorToLLVM/vector-mask-to-llvm.mlir |  4 +-
 .../VectorToLLVM/vector-to-llvm.mlir  |  4 +-
 .../VectorToLLVM/vector-xfer-to-llvm.mlir | 80 +--
 4 files changed, 44 insertions(+), 51 deletions(-)

diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp 
b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
index 4623b9667998cc..64a9ad8e9bade0 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
@@ -61,8 +61,8 @@ struct ConvertVectorToLLVMPass
 } // namespace
 
 void ConvertVectorToLLVMPass::runOnOperation() {
-  // Perform progressive lowering of operations on slices and
-  // all contraction operations. Also applies folding and DCE.
+  // Perform progressive lowering of operations on slices and all contraction
+  // operations. Also materializes masks, applies folding and DCE.
   {
 RewritePatternSet patterns(&getContext());
 populateVectorToVectorCanonicalizationPatterns(patterns);
@@ -76,6 +76,8 @@ void ConvertVectorToLLVMPass::runOnOperation() {
 VectorTransformsOptions());
 // Vector transfer ops with rank > 1 should be lowered with VectorToSCF.
 populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1);
+populateVectorMaskMaterializationPatterns(patterns,
+  force32BitVectorIndices);
 (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
@@ -83,7 +85,6 @@ void ConvertVectorToLLVMPass::runOnOperation() {
   LowerToLLVMOptions options(&getContext());
   LLVMTypeConverter converter(&getContext(), options);
   RewritePatternSet patterns(&getContext());
-  populateVectorMaskMaterializationPatterns(patterns, force32BitVectorIndices);
   populateVectorTransferLoweringPatterns(patterns);
   populateVectorToLLVMMatrixConversionPatterns(converter, patterns);
   populateVectorToLLVMConversionPatterns(
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir 
b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
index 82351eb7c98a43..91e5358622b69d 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
@@ -7,7 +7,7 @@
 // CMP32: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i32
 // CMP32: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : 
vector<11xi32>
 // CMP32: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0] : vector<11xi32>
-// CMP32: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi32>
+// CMP32: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi32>
 // CMP32: return %[[T4]] : vector<11xi1>
 
 // CMP64-LABEL: @genbool_var_1d(
@@ -16,7 +16,7 @@
 // CMP64: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i64
 // CMP64: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : 
vector<11xi64>
 // CMP64: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0] : vector<11xi64>
-// CMP64: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi64>
+// CMP64: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi64>
 // CMP64: return %[[T4]] : vector<11xi1>
 
 func.func @genbool_var_1d(%arg0: index) -> vector<11xi1> {
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir 
b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 2473fe933ffcb2..ea88fece9e662d 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -3097,7 +3097,7 @@ func.func @create_mask_0d(%num_elems : index) -> 
vector {
 // CHECK:  %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to 
i32
 // CHECK:  %[[BOUNDS:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]]
 // CHECK:  %[[BOUNDS_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[BOUNDS]] : vector<1xi32> to vector
-// CHECK:  %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS_CAST]] : 
vector
+// CHECK:  %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS_CAST]], %[[INDICES]] : 
vector
 // CHECK:  return %[[RESULT]] : vector
 
 

[llvm-branch-commits] [lld] 42e6efd - Revert "[LLD][COFF] Introduce hybrid symbol table for EC input files on ARM64…"

2024-12-15 Thread via llvm-branch-commits

Author: Jacek Caban
Date: 2024-12-15T22:30:10+01:00
New Revision: 42e6efd5b07177f5a7f529b36022c98724e2df40

URL: 
https://github.com/llvm/llvm-project/commit/42e6efd5b07177f5a7f529b36022c98724e2df40
DIFF: 
https://github.com/llvm/llvm-project/commit/42e6efd5b07177f5a7f529b36022c98724e2df40.diff

LOG: Revert "[LLD][COFF] Introduce hybrid symbol table for EC input files on 
ARM64…"

This reverts commit a8206e7b37929f4754806667680ffba0206eef95.

Added: 


Modified: 
lld/COFF/COFFLinkerContext.h
lld/COFF/Driver.cpp
lld/COFF/InputFiles.cpp
lld/COFF/InputFiles.h
lld/COFF/SymbolTable.h
lld/test/COFF/arm64ec-codemap.test
lld/test/COFF/arm64ec-entry-thunk.s
lld/test/COFF/arm64ec-lib.test
lld/test/COFF/arm64ec-range-thunks.s

Removed: 
lld/test/COFF/arm64x-symtab.s



diff  --git a/lld/COFF/COFFLinkerContext.h b/lld/COFF/COFFLinkerContext.h
index bdd625b8c3916b..5d89e97a7f7761 100644
--- a/lld/COFF/COFFLinkerContext.h
+++ b/lld/COFF/COFFLinkerContext.h
@@ -32,27 +32,6 @@ class COFFLinkerContext : public CommonLinkerContext {
   SymbolTable symtab;
   COFFOptTable optTable;
 
-  // A hybrid ARM64EC symbol table on ARM64X target.
-  std::optional hybridSymtab;
-
-  // Pointer to the ARM64EC symbol table: either symtab for an ARM64EC target 
or
-  // hybridSymtab for an ARM64X target.
-  SymbolTable *symtabEC = nullptr;
-
-  // Returns the appropriate symbol table for the specified machine type.
-  SymbolTable &getSymtab(llvm::COFF::MachineTypes machine) {
-if (hybridSymtab && (machine == ARM64EC || machine == AMD64))
-  return *hybridSymtab;
-return symtab;
-  }
-
-  // Invoke the specified callback for each symbol table.
-  void forEachSymtab(std::function f) {
-f(symtab);
-if (hybridSymtab)
-  f(*hybridSymtab);
-  }
-
   std::vector objFileInstances;
   std::map pdbInputFileInstances;
   std::vector importFileInstances;

diff  --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 64be0413f86ea1..0705f1c1be9992 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -596,17 +596,7 @@ void LinkerDriver::setMachine(MachineTypes machine) {
   assert(machine != IMAGE_FILE_MACHINE_UNKNOWN);
 
   ctx.config.machine = machine;
-
-  if (machine != ARM64X) {
-ctx.symtab.machine = machine;
-if (machine == ARM64EC)
-  ctx.symtabEC = &ctx.symtab;
-  } else {
-ctx.symtab.machine = ARM64;
-ctx.hybridSymtab.emplace(ctx, ARM64EC);
-ctx.symtabEC = &*ctx.hybridSymtab;
-  }
-
+  ctx.symtab.machine = machine;
   addWinSysRootLibSearchPaths();
 }
 
@@ -2529,56 +2519,54 @@ void LinkerDriver::linkerMain(ArrayRef 
argsArr) {
   if (config->imageBase == uint64_t(-1))
 config->imageBase = getDefaultImageBase();
 
-  ctx.forEachSymtab([&](SymbolTable &symtab) {
-symtab.addSynthetic(mangle("__ImageBase"), nullptr);
-if (symtab.machine == I386) {
-  symtab.addAbsolute("___safe_se_handler_table", 0);
-  symtab.addAbsolute("___safe_se_handler_count", 0);
-}
-
-symtab.addAbsolute(mangle("__guard_fids_count"), 0);
-symtab.addAbsolute(mangle("__guard_fids_table"), 0);
-symtab.addAbsolute(mangle("__guard_flags"), 0);
-symtab.addAbsolute(mangle("__guard_iat_count"), 0);
-symtab.addAbsolute(mangle("__guard_iat_table"), 0);
-symtab.addAbsolute(mangle("__guard_longjmp_count"), 0);
-symtab.addAbsolute(mangle("__guard_longjmp_table"), 0);
-// Needed for MSVC 2017 15.5 CRT.
-symtab.addAbsolute(mangle("__enclave_config"), 0);
-// Needed for MSVC 2019 16.8 CRT.
-symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0);
-symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0);
-
-if (isArm64EC(ctx.config.machine)) {
-  symtab.addAbsolute("__arm64x_extra_rfe_table", 0);
-  symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0);
-  symtab.addAbsolute("__arm64x_redirection_metadata", 0);
-  symtab.addAbsolute("__arm64x_redirection_metadata_count", 0);
-  symtab.addAbsolute("__hybrid_auxiliary_delayload_iat_copy", 0);
-  symtab.addAbsolute("__hybrid_auxiliary_delayload_iat", 0);
-  symtab.addAbsolute("__hybrid_auxiliary_iat", 0);
-  symtab.addAbsolute("__hybrid_auxiliary_iat_copy", 0);
-  symtab.addAbsolute("__hybrid_code_map", 0);
-  symtab.addAbsolute("__hybrid_code_map_count", 0);
-  symtab.addAbsolute("__hybrid_image_info_bitfield", 0);
-  symtab.addAbsolute("__x64_code_ranges_to_entry_points", 0);
-  symtab.addAbsolute("__x64_code_ranges_to_entry_points_count", 0);
-  symtab.addSynthetic("__guard_check_icall_a64n_fptr", nullptr);
-  symtab.addSynthetic("__arm64x_native_entrypoint", nullptr);
-}
-
-if (config->pseudoRelocs) {
-  symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
-  symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
-}
-if (config->mingw) {
-  symtab.addAbsolute(mangle("__CTOR_LIS

[llvm-branch-commits] [flang] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)

2024-12-15 Thread Matthias Springer via llvm-branch-commits

https://github.com/matthias-springer updated 
https://github.com/llvm/llvm-project/pull/116524

>From bc93c7840aa0d9d361f6f7aab08ec59b786bab2a Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Sun, 15 Dec 2024 17:36:49 +0100
Subject: [PATCH] ex

---
 .../lib/Optimizer/CodeGen/BoxedProcedure.cpp  |   1 -
 mlir/docs/DialectConversion.md|  35 +-
 .../mlir/Transforms/DialectConversion.h   |  18 +-
 .../Conversion/LLVMCommon/TypeConverter.cpp   |  14 +-
 .../EmitC/Transforms/TypeConversions.cpp  |   1 -
 .../Dialect/Linalg/Transforms/Detensorize.cpp |   1 -
 .../Quant/Transforms/StripFuncQuantTypes.cpp  |   1 -
 .../Utils/SparseTensorDescriptor.cpp  |   3 -
 .../Vector/Transforms/VectorLinearize.cpp |   1 -
 .../Transforms/Utils/DialectConversion.cpp| 432 +-
 mlir/test/Transforms/test-legalizer.mlir  |   7 +-
 .../Func/TestDecomposeCallGraphTypes.cpp  |   2 +-
 mlir/test/lib/Dialect/Test/TestPatterns.cpp   |   1 -
 .../lib/Transforms/TestDialectConversion.cpp  |   1 -
 14 files changed, 227 insertions(+), 291 deletions(-)

diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp 
b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
index 1bb91d252529f0..104ae7408b80c1 100644
--- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
+++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
@@ -172,7 +172,6 @@ class BoxprocTypeRewriter : public mlir::TypeConverter {
 addConversion([&](TypeDescType ty) {
   return TypeDescType::get(convertType(ty.getOfTy()));
 });
-addArgumentMaterialization(materializeProcedure);
 addSourceMaterialization(materializeProcedure);
 addTargetMaterialization(materializeProcedure);
   }
diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md
index 3168f5e13c7515..abacd5a82c61eb 100644
--- a/mlir/docs/DialectConversion.md
+++ b/mlir/docs/DialectConversion.md
@@ -242,19 +242,6 @@ cannot. These materializations are used by the conversion 
framework to ensure
 type safety during the conversion process. There are several types of
 materializations depending on the situation.
 
-*   Argument Materialization
-
--   An argument materialization is used when converting the type of a block
-argument during a [signature conversion](#region-signature-conversion).
-The new block argument types are specified in a `SignatureConversion`
-object. An original block argument can be converted into multiple
-block arguments, which is not supported everywhere in the dialect
-conversion. (E.g., adaptors support only a single replacement value for
-each original value.) Therefore, an argument materialization is used to
-convert potentially multiple new block arguments back into a single SSA
-value. An argument materialization is also used when replacing an op
-result with multiple values.
-
 *   Source Materialization
 
 -   A source materialization is used when a value was replaced with a value
@@ -343,17 +330,6 @@ class TypeConverter {
   /// Materialization functions must be provided when a type conversion may
   /// persist after the conversion has finished.
 
-  /// This method registers a materialization that will be called when
-  /// converting (potentially multiple) block arguments that were the result of
-  /// a signature conversion of a single block argument, to a single SSA value
-  /// with the old argument type.
-  template ::template 
arg_t<1>>
-  void addArgumentMaterialization(FnT &&callback) {
-argumentMaterializations.emplace_back(
-wrapMaterialization(std::forward(callback)));
-  }
-
   /// This method registers a materialization that will be called when
   /// converting a replacement value back to its original source type.
   /// This is used when some uses of the original value persist beyond the main
@@ -406,12 +382,11 @@ done explicitly via a conversion pattern.
 To convert the types of block arguments within a Region, a custom hook on the
 `ConversionPatternRewriter` must be invoked; `convertRegionTypes`. This hook
 uses a provided type converter to apply type conversions to all blocks of a
-given region. As noted above, the conversions performed by this method use the
-argument materialization hook on the `TypeConverter`. This hook also takes an
-optional `TypeConverter::SignatureConversion` parameter that applies a custom
-conversion to the entry block of the region. The types of the entry block
-arguments are often tied semantically to the operation, e.g.,
-`func::FuncOp`, `AffineForOp`, etc.
+given region. This hook also takes an optional
+`TypeConverter::SignatureConversion` parameter that applies a custom conversion
+to the entry block of the region. The types of the entry block arguments are
+often tied semantically to the operation, e.g., `func::FuncOp`, `AffineForOp`,
+etc.
 
 To convert the signature of just one given block, the
 `applySignatureConversion` hook can be used.
diff -

[llvm-branch-commits] [flang] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)

2024-12-15 Thread Matthias Springer via llvm-branch-commits

https://github.com/matthias-springer updated 
https://github.com/llvm/llvm-project/pull/116524

>From bf0d13553b2bc2124a266e398976ba80a1114580 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Sat, 14 Dec 2024 16:34:47 +0100
Subject: [PATCH 1/4] [mlir][Vector] Move mask materialization patterns to
 greedy rewrite

The mask materialization patterns during `VectorToLLVM` are rewrite patterns. 
They should run as part of the greedy pattern rewrite and not the dialect 
conversion. (Rewrite patterns and conversion patterns are not generally 
compatible.)

The current combination of rewrite patterns and conversion patterns triggered 
an edge case when merging the 1:1 and 1:N dialect conversions.
---
 .../VectorToLLVM/ConvertVectorToLLVMPass.cpp  |  7 +-
 .../VectorToLLVM/vector-mask-to-llvm.mlir |  4 +-
 .../VectorToLLVM/vector-to-llvm.mlir  |  4 +-
 .../VectorToLLVM/vector-xfer-to-llvm.mlir | 80 +--
 4 files changed, 44 insertions(+), 51 deletions(-)

diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp 
b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
index 4623b9667998cc..64a9ad8e9bade0 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
@@ -61,8 +61,8 @@ struct ConvertVectorToLLVMPass
 } // namespace
 
 void ConvertVectorToLLVMPass::runOnOperation() {
-  // Perform progressive lowering of operations on slices and
-  // all contraction operations. Also applies folding and DCE.
+  // Perform progressive lowering of operations on slices and all contraction
+  // operations. Also materializes masks, applies folding and DCE.
   {
 RewritePatternSet patterns(&getContext());
 populateVectorToVectorCanonicalizationPatterns(patterns);
@@ -76,6 +76,8 @@ void ConvertVectorToLLVMPass::runOnOperation() {
 VectorTransformsOptions());
 // Vector transfer ops with rank > 1 should be lowered with VectorToSCF.
 populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1);
+populateVectorMaskMaterializationPatterns(patterns,
+  force32BitVectorIndices);
 (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
@@ -83,7 +85,6 @@ void ConvertVectorToLLVMPass::runOnOperation() {
   LowerToLLVMOptions options(&getContext());
   LLVMTypeConverter converter(&getContext(), options);
   RewritePatternSet patterns(&getContext());
-  populateVectorMaskMaterializationPatterns(patterns, force32BitVectorIndices);
   populateVectorTransferLoweringPatterns(patterns);
   populateVectorToLLVMMatrixConversionPatterns(converter, patterns);
   populateVectorToLLVMConversionPatterns(
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir 
b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
index 82351eb7c98a43..91e5358622b69d 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
@@ -7,7 +7,7 @@
 // CMP32: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i32
 // CMP32: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : 
vector<11xi32>
 // CMP32: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0] : vector<11xi32>
-// CMP32: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi32>
+// CMP32: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi32>
 // CMP32: return %[[T4]] : vector<11xi1>
 
 // CMP64-LABEL: @genbool_var_1d(
@@ -16,7 +16,7 @@
 // CMP64: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i64
 // CMP64: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : 
vector<11xi64>
 // CMP64: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0] : vector<11xi64>
-// CMP64: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi64>
+// CMP64: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi64>
 // CMP64: return %[[T4]] : vector<11xi1>
 
 func.func @genbool_var_1d(%arg0: index) -> vector<11xi1> {
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir 
b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 2473fe933ffcb2..ea88fece9e662d 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -3097,7 +3097,7 @@ func.func @create_mask_0d(%num_elems : index) -> 
vector {
 // CHECK:  %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to 
i32
 // CHECK:  %[[BOUNDS:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]]
 // CHECK:  %[[BOUNDS_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[BOUNDS]] : vector<1xi32> to vector
-// CHECK:  %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS_CAST]] : 
vector
+// CHECK:  %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS_CAST]], %[[INDICES]] : 
vector
 // CHECK:  return %[[RESULT]] : vector
 
 

[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect Conversion: No target mat. for 1:N replacement (PR #117513)

2024-12-15 Thread Matthias Springer via llvm-branch-commits

https://github.com/matthias-springer edited 
https://github.com/llvm/llvm-project/pull/117513
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)

2024-12-15 Thread Matthias Springer via llvm-branch-commits

https://github.com/matthias-springer updated 
https://github.com/llvm/llvm-project/pull/116524

>From eff9c47de3405dc542644d5d64e5a26f793214d0 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Sun, 15 Dec 2024 17:36:49 +0100
Subject: [PATCH] ex

---
 .../lib/Optimizer/CodeGen/BoxedProcedure.cpp  |   1 -
 mlir/docs/DialectConversion.md|  35 +-
 .../mlir/Transforms/DialectConversion.h   |  18 +-
 .../Conversion/LLVMCommon/TypeConverter.cpp   |  14 +-
 .../EmitC/Transforms/TypeConversions.cpp  |   1 -
 .../Dialect/Linalg/Transforms/Detensorize.cpp |   1 -
 .../Quant/Transforms/StripFuncQuantTypes.cpp  |   1 -
 .../Utils/SparseTensorDescriptor.cpp  |   3 -
 .../Vector/Transforms/VectorLinearize.cpp |   1 -
 .../Transforms/Utils/DialectConversion.cpp| 357 --
 mlir/test/Transforms/test-legalizer.mlir  |   7 +-
 .../Func/TestDecomposeCallGraphTypes.cpp  |   2 +-
 mlir/test/lib/Dialect/Test/TestPatterns.cpp   |   1 -
 .../lib/Transforms/TestDialectConversion.cpp  |   1 -
 14 files changed, 168 insertions(+), 275 deletions(-)

diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp 
b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
index 1bb91d252529f0..104ae7408b80c1 100644
--- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
+++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
@@ -172,7 +172,6 @@ class BoxprocTypeRewriter : public mlir::TypeConverter {
 addConversion([&](TypeDescType ty) {
   return TypeDescType::get(convertType(ty.getOfTy()));
 });
-addArgumentMaterialization(materializeProcedure);
 addSourceMaterialization(materializeProcedure);
 addTargetMaterialization(materializeProcedure);
   }
diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md
index 3168f5e13c7515..abacd5a82c61eb 100644
--- a/mlir/docs/DialectConversion.md
+++ b/mlir/docs/DialectConversion.md
@@ -242,19 +242,6 @@ cannot. These materializations are used by the conversion 
framework to ensure
 type safety during the conversion process. There are several types of
 materializations depending on the situation.
 
-*   Argument Materialization
-
--   An argument materialization is used when converting the type of a block
-argument during a [signature conversion](#region-signature-conversion).
-The new block argument types are specified in a `SignatureConversion`
-object. An original block argument can be converted into multiple
-block arguments, which is not supported everywhere in the dialect
-conversion. (E.g., adaptors support only a single replacement value for
-each original value.) Therefore, an argument materialization is used to
-convert potentially multiple new block arguments back into a single SSA
-value. An argument materialization is also used when replacing an op
-result with multiple values.
-
 *   Source Materialization
 
 -   A source materialization is used when a value was replaced with a value
@@ -343,17 +330,6 @@ class TypeConverter {
   /// Materialization functions must be provided when a type conversion may
   /// persist after the conversion has finished.
 
-  /// This method registers a materialization that will be called when
-  /// converting (potentially multiple) block arguments that were the result of
-  /// a signature conversion of a single block argument, to a single SSA value
-  /// with the old argument type.
-  template ::template 
arg_t<1>>
-  void addArgumentMaterialization(FnT &&callback) {
-argumentMaterializations.emplace_back(
-wrapMaterialization(std::forward(callback)));
-  }
-
   /// This method registers a materialization that will be called when
   /// converting a replacement value back to its original source type.
   /// This is used when some uses of the original value persist beyond the main
@@ -406,12 +382,11 @@ done explicitly via a conversion pattern.
 To convert the types of block arguments within a Region, a custom hook on the
 `ConversionPatternRewriter` must be invoked; `convertRegionTypes`. This hook
 uses a provided type converter to apply type conversions to all blocks of a
-given region. As noted above, the conversions performed by this method use the
-argument materialization hook on the `TypeConverter`. This hook also takes an
-optional `TypeConverter::SignatureConversion` parameter that applies a custom
-conversion to the entry block of the region. The types of the entry block
-arguments are often tied semantically to the operation, e.g.,
-`func::FuncOp`, `AffineForOp`, etc.
+given region. This hook also takes an optional
+`TypeConverter::SignatureConversion` parameter that applies a custom conversion
+to the entry block of the region. The types of the entry block arguments are
+often tied semantically to the operation, e.g., `func::FuncOp`, `AffineForOp`,
+etc.
 
 To convert the signature of just one given block, the
 `applySignatureConversion` hook can be used.
diff -

[llvm-branch-commits] [llvm] RegAlloc: Do not fatal error if there are no registers in the alloc order (PR #119640)

2024-12-15 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Dec 15, 8:41 PM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/119640).


https://github.com/llvm/llvm-project/pull/119640
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] RegAlloc: Fix failure on undef use when all registers are reserved (PR #119647)

2024-12-15 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Dec 15, 8:41 PM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/119647).


https://github.com/llvm/llvm-project/pull/119647
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] RegAlloc: Fix verifier error after failed allocation (PR #119690)

2024-12-15 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/119690

>From 2c3b6e52f9cb028c579f19379a59440a9dcbaba5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 25 Mar 2022 20:27:39 -0400
Subject: [PATCH] RegAlloc: Fix verifier error after failed allocation

In some cases after reporting an allocation failure, this would fail
the verifier. It picks the first allocatable register and assigns it,
but didn't update the liveness appropriately. When VirtRegRewriter
relied on the liveness to set kill flags, it would incorrectly add
kill flags if there was another overlapping kill of the virtual
register.

We can't properly assign the register to an overlapping range, so
break the liveness of the failing register (and any other interfering
registers) instead. Give the virtual register dummy liveness by
effectively deleting all the uses by setting them to undef.

The edge case not tested here which I'm worried about is if the read
of the register is a def of a subregister. I've been unable to come up
with a test where this occurs.

https://reviews.llvm.org/D122616
---
 llvm/lib/CodeGen/RegAllocBase.cpp | 36 +++
 llvm/lib/CodeGen/RegAllocBase.h   |  6 ++
 llvm/lib/CodeGen/RegAllocBasic.cpp|  1 +
 llvm/lib/CodeGen/RegAllocGreedy.cpp   |  1 +
 .../AMDGPU/illegal-eviction-assert.mir|  4 +-
 llvm/test/CodeGen/AMDGPU/issue48473.mir   |  3 +-
 ...ut-of-registers-error-all-regs-reserved.ll |  8 +--
 ...lloc-failure-overlapping-insert-assert.mir |  7 +--
 ...ster-killed-error-after-alloc-failure0.mir | 59 +++
 ...ister-killed-error-after-alloc-failure1.ll | 30 ++
 .../remaining-virtual-register-operands.ll|  3 +-
 llvm/test/CodeGen/X86/inline-asm-assertion.ll |  2 +-
 12 files changed, 144 insertions(+), 16 deletions(-)
 create mode 100644 
llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir
 create mode 100644 
llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll

diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp 
b/llvm/lib/CodeGen/RegAllocBase.cpp
index 980a6756963d9f..bb0c8a32a7bc92 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -65,6 +65,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
   Matrix = &mat;
   MRI->freezeReservedRegs();
   RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+  FailedVRegs.clear();
 }
 
 // Visit all the live registers. If they are already assigned to a physical
@@ -128,6 +129,7 @@ void RegAllocBase::allocatePhysRegs() {
 
   // Keep going after reporting the error.
   VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg);
+  FailedVRegs.insert(VirtReg->reg());
 } else if (AvailablePhysReg)
   Matrix->assign(*VirtReg, AvailablePhysReg);
 
@@ -161,6 +163,40 @@ void RegAllocBase::postOptimization() {
   DeadRemats.clear();
 }
 
+void RegAllocBase::cleanupFailedVRegs() {
+  SmallSet JunkRegs;
+
+  for (Register FailedReg : FailedVRegs) {
+JunkRegs.insert(FailedReg);
+
+MCRegister PhysReg = VRM->getPhys(FailedReg);
+LiveInterval &FailedInterval = LIS->getInterval(FailedReg);
+
+// The liveness information for the failed register and anything 
interfering
+// with the physical register we arbitrarily chose is junk and needs to be
+// deleted.
+for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+  LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units);
+  for (const LiveInterval *InterferingReg : Q.interferingVRegs())
+JunkRegs.insert(InterferingReg->reg());
+}
+  }
+
+  // TODO: Probably need to set undef on any physreg uses not associated with
+  // a virtual register.
+  for (Register JunkReg : JunkRegs) {
+// We still should produce valid IR. Kill all the uses and reduce the live
+// ranges so that we don't think it's possible to introduce kill flags
+// later which will fail the verifier.
+for (MachineOperand &MO : MRI->reg_operands(JunkReg)) {
+  if (MO.readsReg())
+MO.setIsUndef(true);
+}
+
+LIS->shrinkToUses(&LIS->getInterval(JunkReg));
+  }
+}
+
 void RegAllocBase::enqueue(const LiveInterval *LI) {
   const Register Reg = LI->reg();
 
diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h
index 5bd52da61f2dc5..1fdbab694bb0e3 100644
--- a/llvm/lib/CodeGen/RegAllocBase.h
+++ b/llvm/lib/CodeGen/RegAllocBase.h
@@ -37,6 +37,7 @@
 #define LLVM_LIB_CODEGEN_REGALLOCBASE_H
 
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegAllocCommon.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
@@ -81,6 +82,7 @@ class RegAllocBase {
   /// always available for the remat of all the siblings of the original reg.
   SmallPtrSet DeadRemats;
 
+  SmallSet FailedVRegs;
   RegAllocBase(const RegAllocFilterFunc F = nullptr)
   : shouldAlloc

[llvm-branch-commits] [llvm] AMDGPU: Delete spills of undef values (PR #119684)

2024-12-15 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/119684

>From 69aa81bb9763c092605e9445f61657314f8c390d Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 28 Mar 2022 11:24:48 -0400
Subject: [PATCH 1/2] AMDGPU: Delete spills of undef values

It would be a bit more logical to preserve the undef and do the normal
expansion, but this is less work. This avoids verifier errors in a
future patch which starts deleting liveness from registers after
allocation failures which results in spills of undef values.

https://reviews.llvm.org/D122607
---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 12 ++
 .../AMDGPU/sgpr-spill-partially-undef.mir | 42 +++
 .../AMDGPU/spill-agpr-partially-undef.mir | 34 +++
 llvm/test/CodeGen/AMDGPU/vgpr-spill.mir   | 34 +++
 4 files changed, 122 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp 
b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 2f5a99e5de5e3e..925d0d5eed2ef0 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1956,6 +1956,13 @@ bool 
SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
RegScavenger *RS, SlotIndexes *Indexes,
LiveIntervals *LIS, bool OnlyToVGPR,
bool SpillToPhysVGPRLane) const {
+  if (MI->getOperand(0).isUndef()) {
+if (Indexes)
+  Indexes->removeMachineInstrFromMaps(*MI);
+MI->eraseFromParent();
+return true;
+  }
+
   SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
 
   ArrayRef VGPRSpills =
@@ -2377,6 +2384,11 @@ bool 
SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
   const MachineOperand *VData = TII->getNamedOperand(*MI,
  
AMDGPU::OpName::vdata);
+  if (VData->isUndef()) {
+MI->eraseFromParent();
+return true;
+  }
+
   assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
  MFI->getStackPtrOffsetReg());
 
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir 
b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
index 774785fb3966fc..d352e8a13da9f1 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
@@ -54,3 +54,45 @@ body: |
 SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, 
implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into 
%stack.0, align 4, addrspace 5)
 
 ...
+
+---
+name:  sgpr_spill_s32_undef
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  hasSpilledSGPRs: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+stack:
+  - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+body: |
+  bb.0:
+; CHECK-LABEL: name: sgpr_spill_s32_undef
+; CHECK: body:
+; CHECK-NEXT: bb.0:
+; CHECK-NOT: {{.+}}
+; CHECK: ...
+SI_SPILL_S32_SAVE undef $sgpr8, %stack.0, implicit $exec, implicit 
$sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s32) into %stack.0, 
align 4, addrspace 5)
+
+...
+
+---
+name:  sgpr_spill_s64_undef
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  hasSpilledSGPRs: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+stack:
+  - { id: 0, type: spill-slot, size: 8, alignment: 4, stack-id: sgpr-spill }
+body: |
+  bb.0:
+; CHECK-LABEL: name: sgpr_spill_s64_undef
+; CHECK: body:
+; CHECK-NEXT: bb.0:
+; CHECK-NOT: {{.+}}
+; CHECK: ...
+SI_SPILL_S64_SAVE undef $sgpr8_sgpr9, %stack.0, implicit $exec, implicit 
$sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, 
align 4, addrspace 5)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir 
b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
index c825674de7652c..b02b6e79d7a76f 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
@@ -71,3 +71,37 @@ body: |
 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, 
$sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed 
$agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
 SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit 
$exec :: (store (s64) into %stack.0, addrspace 5)
 ...
+
+---
+name: spill_a32_undef
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: spill-slot, size: 4, alignment: 4 }
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  frameOffsetReg: '$sgpr33'
+body: |
+  bb.0:
+; CHECK-LABEL: na

[llvm-branch-commits] [flang] [mlir] [mlir][Transforms] Support 1:N mappings in `ConversionValueMapping` (PR #116524)

2024-12-15 Thread Matthias Springer via llvm-branch-commits

https://github.com/matthias-springer updated 
https://github.com/llvm/llvm-project/pull/116524

>From bf0d13553b2bc2124a266e398976ba80a1114580 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Sat, 14 Dec 2024 16:34:47 +0100
Subject: [PATCH 1/4] [mlir][Vector] Move mask materialization patterns to
 greedy rewrite

The mask materialization patterns during `VectorToLLVM` are rewrite patterns. 
They should run as part of the greedy pattern rewrite and not the dialect 
conversion. (Rewrite patterns and conversion patterns are not generally 
compatible.)

The current combination of rewrite patterns and conversion patterns triggered 
an edge case when merging the 1:1 and 1:N dialect conversions.
---
 .../VectorToLLVM/ConvertVectorToLLVMPass.cpp  |  7 +-
 .../VectorToLLVM/vector-mask-to-llvm.mlir |  4 +-
 .../VectorToLLVM/vector-to-llvm.mlir  |  4 +-
 .../VectorToLLVM/vector-xfer-to-llvm.mlir | 80 +--
 4 files changed, 44 insertions(+), 51 deletions(-)

diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp 
b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
index 4623b9667998cc..64a9ad8e9bade0 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp
@@ -61,8 +61,8 @@ struct ConvertVectorToLLVMPass
 } // namespace
 
 void ConvertVectorToLLVMPass::runOnOperation() {
-  // Perform progressive lowering of operations on slices and
-  // all contraction operations. Also applies folding and DCE.
+  // Perform progressive lowering of operations on slices and all contraction
+  // operations. Also materializes masks, applies folding and DCE.
   {
 RewritePatternSet patterns(&getContext());
 populateVectorToVectorCanonicalizationPatterns(patterns);
@@ -76,6 +76,8 @@ void ConvertVectorToLLVMPass::runOnOperation() {
 VectorTransformsOptions());
 // Vector transfer ops with rank > 1 should be lowered with VectorToSCF.
 populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1);
+populateVectorMaskMaterializationPatterns(patterns,
+  force32BitVectorIndices);
 (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
   }
 
@@ -83,7 +85,6 @@ void ConvertVectorToLLVMPass::runOnOperation() {
   LowerToLLVMOptions options(&getContext());
   LLVMTypeConverter converter(&getContext(), options);
   RewritePatternSet patterns(&getContext());
-  populateVectorMaskMaterializationPatterns(patterns, force32BitVectorIndices);
   populateVectorTransferLoweringPatterns(patterns);
   populateVectorToLLVMMatrixConversionPatterns(converter, patterns);
   populateVectorToLLVMConversionPatterns(
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir 
b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
index 82351eb7c98a43..91e5358622b69d 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
@@ -7,7 +7,7 @@
 // CMP32: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i32
 // CMP32: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : 
vector<11xi32>
 // CMP32: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0] : vector<11xi32>
-// CMP32: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi32>
+// CMP32: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi32>
 // CMP32: return %[[T4]] : vector<11xi1>
 
 // CMP64-LABEL: @genbool_var_1d(
@@ -16,7 +16,7 @@
 // CMP64: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i64
 // CMP64: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : 
vector<11xi64>
 // CMP64: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0] : vector<11xi64>
-// CMP64: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<11xi64>
+// CMP64: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi64>
 // CMP64: return %[[T4]] : vector<11xi1>
 
 func.func @genbool_var_1d(%arg0: index) -> vector<11xi1> {
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir 
b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 2473fe933ffcb2..ea88fece9e662d 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -3097,7 +3097,7 @@ func.func @create_mask_0d(%num_elems : index) -> 
vector {
 // CHECK:  %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to 
i32
 // CHECK:  %[[BOUNDS:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]]
 // CHECK:  %[[BOUNDS_CAST:.*]] = builtin.unrealized_conversion_cast 
%[[BOUNDS]] : vector<1xi32> to vector
-// CHECK:  %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS_CAST]] : 
vector
+// CHECK:  %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS_CAST]], %[[INDICES]] : 
vector
 // CHECK:  return %[[RESULT]] : vector
 
 

[llvm-branch-commits] [clang] [llvm] AMDGPU: Fix libcall recognition of image array types (PR #119832)

2024-12-15 Thread Sameer Sahasrabuddhe via llvm-branch-commits


@@ -622,9 +622,9 @@ bool ItaniumParamParser::parseItaniumParam(StringRef& param,
   if (isDigit(TC)) {
 res.ArgType =
 StringSwitch(eatLengthPrefixedName(param))
-.Case("ocl_image1darray", AMDGPULibFunc::IMG1DA)
-.Case("ocl_image1dbuffer", AMDGPULibFunc::IMG1DB)
-.Case("ocl_image2darray", AMDGPULibFunc::IMG2DA)
+.StartsWith("ocl_image1d_array", AMDGPULibFunc::IMG1DA)
+.StartsWith("ocl_image1d_buffer", AMDGPULibFunc::IMG1DB)
+.StartsWith("ocl_image2d_array", AMDGPULibFunc::IMG2DA)

ssahasra wrote:

Shouldn't this change also fix the mangling generated in `getItaniumTypeName`?

https://github.com/llvm/llvm-project/pull/119832
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] AMDGPU: Fix libcall recognition of image array types (PR #119832)

2024-12-15 Thread Sameer Sahasrabuddhe via llvm-branch-commits

https://github.com/ssahasra approved this pull request.


https://github.com/llvm/llvm-project/pull/119832
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] AMDGPU: Fix libcall recognition of image array types (PR #119832)

2024-12-15 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Dec 16, 12:57 AM EST**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/119832).


https://github.com/llvm/llvm-project/pull/119832
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][BoundsChecking] Add TrapBB local variable (PR #119983)

2024-12-15 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/119983


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][BoundsChecking] Add TrapBB local variable (PR #119983)

2024-12-15 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/119983


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][BoundsChecking] Add TrapBB local variable (PR #119983)

2024-12-15 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka ready_for_review 
https://github.com/llvm/llvm-project/pull/119983
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [ubsan] Add runtime test for -fsanitize=local-bounds (PR #120038)

2024-12-15 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Vitaly Buka (vitalybuka)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/120038.diff


1 Files Affected:

- (added) compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp (+28) 


``diff
diff --git a/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp 
b/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp
new file mode 100644
index 00..92e9e62dbc8698
--- /dev/null
+++ b/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp
@@ -0,0 +1,28 @@
+// RUN: %clangxx -fsanitize=local-bounds %s -O3 -o %t && %run %t 1
+// RUN: %clangxx -fsanitize=local-bounds %s -O3 -o %t && not --crash %run %t 3
+
+// FIXME: it's always trap for now.
+
+#include 
+
+struct S {
+  int k;
+  int l;
+};
+
+__attribute__((noinline)) void init(S *s) {
+  __asm__ __volatile__("" : : "r"(s) : "memory");
+}
+
+__attribute__((noinline, no_sanitize("memory"))) int test(char i) {
+  S a;
+  init(&a);
+  S b;
+  init(&b);
+  return ((int*)(&a))[i];
+}
+
+int main(int argc, char **argv) {
+  test(argv[1][0] - '0');
+  return 0;
+}

``




https://github.com/llvm/llvm-project/pull/120038
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [ubsan] Add runtime test for -fsanitize=local-bounds (PR #120038)

2024-12-15 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka created 
https://github.com/llvm/llvm-project/pull/120038

None


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [ubsan] Add runtime test for -fsanitize=local-bounds (PR #120038)

2024-12-15 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff b42a2ec4a07d94c6c0d73d4baedf2ffef3d3825c 
1e33ed956935df967b130f30e8e7b701f18304d3 --extensions cpp -- 
compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp
``





View the diff from clang-format here.


``diff
diff --git a/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp 
b/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp
index 92e9e62dbc..edfe439c92 100644
--- a/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp
+++ b/compiler-rt/test/ubsan/TestCases/Misc/local_bounds.cpp
@@ -19,7 +19,7 @@ __attribute__((noinline, no_sanitize("memory"))) int 
test(char i) {
   init(&a);
   S b;
   init(&b);
-  return ((int*)(&a))[i];
+  return ((int *)(&a))[i];
 }
 
 int main(int argc, char **argv) {

``




https://github.com/llvm/llvm-project/pull/120038
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Delete spills of undef values (PR #119684)

2024-12-15 Thread Matt Arsenault via llvm-branch-commits


@@ -54,3 +54,45 @@ body: |
 SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, 
implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into 
%stack.0, align 4, addrspace 5)
 
 ...
+
+---
+name:  sgpr_spill_s32_undef
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  hasSpilledSGPRs: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+stack:
+  - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+body: |
+  bb.0:
+; CHECK-LABEL: name: sgpr_spill_s32_undef
+; CHECK: body:
+; CHECK-NEXT: bb.0:
+; CHECK-NOT: {{.+}}

arsenm wrote:

The -NEXT check is wrong because there is a blank line. I couldn't get the 
regex to match a blank line to work so I did this. Really 
update_mir_test_checks should understand how to match an empty block, anything 
here is a hack 

https://github.com/llvm/llvm-project/pull/119684
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits