[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100513

>From 80b236530103a66b8939aeb26f1d5c2be9043b5c Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 2 Jul 2024 21:28:30 +0200
Subject: [PATCH] AMDGPU: Add baseline test for vectorize of integer min/max

---
 .../SLPVectorizer/AMDGPU/min_max.ll   | 366 ++
 1 file changed, 366 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll 
b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
new file mode 100644
index 0..47b0dbd6b2cff
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
@@ -0,0 +1,366 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @uadd_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 
[[ARG1_0]])
+; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 
[[ARG1_1]])
+; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], 
i64 0
+; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 
[[ADD_1]], i64 1
+; GFX7-NEXT:ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @uadd_sat_v2i16(
+; GFX8-NEXT:  bb:
+; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT:ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @uadd_sat_v2i16(
+; GFX9-NEXT:  bb:
+; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT:ret <2 x i16> [[TMP0]]
+;
+bb:
+  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+  %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
+  %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
+  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+  ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @usub_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 
[[ARG1_0]])
+; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 
[[ARG1_1]])
+; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], 
i64 0
+; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 
[[ADD_1]], i64 1
+; GFX7-NEXT:ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @usub_sat_v2i16(
+; GFX8-NEXT:  bb:
+; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT:ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @usub_sat_v2i16(
+; GFX9-NEXT:  bb:
+; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT:ret <2 x i16> [[TMP0]]
+;
+bb:
+  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+  %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0)
+  %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1)
+  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+  ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @sadd_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = extract

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100519

None

>From c98dcbf907a6b5d085b89f06d49ee8a3bc3e9dd2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:27:54 +0400
Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  67 +-
 .../Analysis/CostModel/X86/arith-overflow.ll  | 120 +-
 .../CostModel/X86/intrinsic-cost-kinds.ll |   6 +-
 3 files changed, 99 insertions(+), 94 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a89d4fe467eb9..314390aee5085 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBO;
   break;
 case Intrinsic::smul_with_overflow:
-case Intrinsic::umul_with_overflow: {
-  Type *MulTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
-  bool IsSigned = IID == Intrinsic::smul_with_overflow;
-
-  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  if (IsSigned)
-Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
-CostKind,
-{TTI::OK_AnyValue, 
TTI::OP_None},
-{TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  Cost += thisT()->getCmpSelInstrCost(
-  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULO;
+  break;
+case Intrinsic::umul_with_overflow:
+  ISD = ISD::UMULO;
+  break;
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
@@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   OverflowTy, Pred, CostKind);
   return Cost;
 }
+case Intrinsic::smul_with_overflow:
+case Intrinsic::umul_with_overflow: {
+  Type *MulTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+  bool IsSigned = IID == Intrinsic::smul_with_overflow;
+
+  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  if (IsSigned)
+Cost += thisT()->getArithmeticInstrCost(
+Instruction::AShr, MulTy, CostKind,
+{TTI::OK_AnyValue, TTI::OP_None},
+{TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  Cost += thisT()->getCmpSelInstrCost(
+  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
+  return Cost;
+}
 case Intrinsic::sadd_sat:
 case Intrinsic::ssub_sat: {
   // Assume a default expansion.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll 
b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index ba745262d1890..2d907d87b057c 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x 
i16> undef, <16 x i16> undef

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100520

None

>From 689ea8720d60ae6fc1226b929f5333adae1ce77c Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:31:04 +0400
Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +---
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 314390aee5085..1a089a3fa9634 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBSAT;
   break;
 case Intrinsic::smul_fix:
-case Intrinsic::umul_fix: {
-  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
-
-  unsigned ExtOp =
-  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, 
CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULFIX;
+  break;
+case Intrinsic::umul_fix:
+  ISD = ISD::UMULFIX;
+  break;
 case Intrinsic::sadd_with_overflow:
   ISD = ISD::SADDO;
   break;
@@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   CmpInst::BAD_ICMP_PREDICATE, CostKind);
   return Cost;
 }
+case Intrinsic::smul_fix:
+case Intrinsic::umul_fix: {
+  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
+
+  unsigned ExtOp =
+  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
+  return Cost;
+}
 default:
   break;
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100519
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100518
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100522

None

>From 330c0e2bf40cf96b1c7778636fa739cb0c1a1f11 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:47:03 +0400
Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics

---
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 341 +
 1 file changed, 341 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll

diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
new file mode 100644
index 0..133b95609bc15
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -0,0 +1,341 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck 
-check-prefixes=SLOW-SIZE %s
+; END.
+
+declare i64@llvm.abs.i64(i64, i1 immarg)
+declare <2 x i64>  @llvm.abs.v2i64(<2 x i64>, i1 immarg)
+declare <4 x i64>  @llvm.abs.v4i64(<4 x i64>, i1 immarg)
+declare <5 x i64>  @llvm.abs.v5i64(<5 x i64>, i1 immarg)
+declare <8 x i64>  @llvm.abs.v8i64(<8 x i64>, i1 immarg)
+
+declare i32@llvm.abs.i32(i32, i1 immarg)
+declare <2 x i32>  @llvm.abs.v2i32(<2 x i32>, i1 immarg)
+declare <4 x i32>  @llvm.abs.v4i32(<4 x i32>, i1 immarg)
+declare <8 x i32>  @llvm.abs.v8i32(<8 x i32>, i1 immarg)
+declare <9 x i32>  @llvm.abs.v9i32(<9 x i32>, i1 immarg)
+declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1 immarg)
+
+declare i16@llvm.abs.i16(i16, i1 immarg)
+declare <2 x i16>  @llvm.abs.v2i16(<2 x i16>, i1 immarg)
+declare <4 x i16>  @llvm.abs.v4i16(<4 x i16>, i1 immarg)
+declare <8 x i16>  @llvm.abs.v8i16(<8 x i16>, i1 immarg)
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg)
+declare <17 x i16> @llvm.abs.v17i16(<17 x i16>, i1 immarg)
+declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1 immarg)
+
+declare i8 @llvm.abs.i8(i8, i1 immarg)
+declare <2 x i8>   @llvm.abs.v2i8(<2 x i8>, i1 immarg)
+declare <4 x i8>   @llvm.abs.v4i8(<4 x i8>, i1 immarg)
+declare <8 x i8>   @llvm.abs.v8i8(<8 x i8>, i1 immarg)
+declare <16 x i8>  @llvm.abs.v16i8(<16 x i8>, i1 immarg)
+declare <32 x i8>  @llvm.abs.v32i8(<32 x i8>, i1 immarg)
+declare <33 x i8>  @llvm.abs.v33i8(<33 x i8>, i1 immarg)
+declare <64 x i8>  @llvm.abs.v64i8(<64 x i8>, i1 immarg)
+
+define i32 @abs_nonpoison(i32 %arg) {
+; FAST-LABEL: 'abs_nonpoison'
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 
= call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: 
%V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = 
call i32 @llvm.abs.i32(i32 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 
= call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: 
%V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: 
%V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: 
%V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100520
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100521
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/100523

None

>From ca78bfb62816c21172101c1f00dcead3efc472dc Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h   | 32 +
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 40 +++---
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index 133b95609bc15..623e02eb8239d 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 
= call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 
= call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for i

[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#100522** https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100522
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/100523?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#100523** https://app.graphite.dev/github/pr/llvm/llvm-project/100523?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#100522** https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/100523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100518
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100519
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100520
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: Matt Arsenault (arsenm)


Changes



---

Patch is 40.56 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/100519.diff


3 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+36-31) 
- (modified) llvm/test/Analysis/CostModel/X86/arith-overflow.ll (+60-60) 
- (modified) llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll (+3-3) 


``diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a89d4fe467eb9..314390aee5085 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBO;
   break;
 case Intrinsic::smul_with_overflow:
-case Intrinsic::umul_with_overflow: {
-  Type *MulTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
-  bool IsSigned = IID == Intrinsic::smul_with_overflow;
-
-  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  if (IsSigned)
-Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
-CostKind,
-{TTI::OK_AnyValue, 
TTI::OP_None},
-{TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  Cost += thisT()->getCmpSelInstrCost(
-  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULO;
+  break;
+case Intrinsic::umul_with_overflow:
+  ISD = ISD::UMULO;
+  break;
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
@@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   OverflowTy, Pred, CostKind);
   return Cost;
 }
+case Intrinsic::smul_with_overflow:
+case Intrinsic::umul_with_overflow: {
+  Type *MulTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+  bool IsSigned = IID == Intrinsic::smul_with_overflow;
+
+  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  if (IsSigned)
+Cost += thisT()->getArithmeticInstrCost(
+Instruction::AShr, MulTy, CostKind,
+{TTI::OK_AnyValue, TTI::OP_None},
+{TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  Cost += thisT()->getCmpSelInstrCost(
+  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
+  return Cost;
+}
 case Intrinsic::sadd_sat:
 case Intrinsic::ssub_sat: {
   // Assume a default expansion.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll 
b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index ba745262d1890..2d907d87b057c 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x 
i16> undef, <16 x i16> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 148 for in

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/100520.diff


1 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+29-24) 


``diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 314390aee5085..1a089a3fa9634 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBSAT;
   break;
 case Intrinsic::smul_fix:
-case Intrinsic::umul_fix: {
-  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
-
-  unsigned ExtOp =
-  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, 
CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULFIX;
+  break;
+case Intrinsic::umul_fix:
+  ISD = ISD::UMULFIX;
+  break;
 case Intrinsic::sadd_with_overflow:
   ISD = ISD::SADDO;
   break;
@@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   CmpInst::BAD_ICMP_PREDICATE, CostKind);
   return Cost;
 }
+case Intrinsic::smul_fix:
+case Intrinsic::umul_fix: {
+  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
+
+  unsigned ExtOp =
+  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
+  return Cost;
+}
 default:
   break;
 }

``




https://github.com/llvm/llvm-project/pull/100520
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100521
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100522
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-analysis

Author: Matt Arsenault (arsenm)


Changes



---

Patch is 127.35 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/100521.diff


6 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+31-25) 
- (modified) llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll (+76-76) 
- (modified) llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll (+58-58) 
- (modified) llvm/test/Analysis/CostModel/X86/fptoi_sat.ll (+200-200) 
- (modified) llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll 
(+35-44) 
- (modified) llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll 
(+35-14) 


``diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 1a089a3fa9634..ba70498bfb731 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2179,31 +2179,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::UMULO;
   break;
 case Intrinsic::fptosi_sat:
-case Intrinsic::fptoui_sat: {
-  if (Tys.empty())
-break;
-  Type *FromTy = Tys[0];
-  bool IsSigned = IID == Intrinsic::fptosi_sat;
-
-  InstructionCost Cost = 0;
-  IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy,
- {FromTy, FromTy});
-  Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);
-  IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy,
- {FromTy, FromTy});
-  Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);
-  Cost += thisT()->getCastInstrCost(
-  IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
-  TTI::CastContextHint::None, CostKind);
-  if (IsSigned) {
-Type *CondTy = RetTy->getWithNewBitWidth(1);
-Cost += thisT()->getCmpSelInstrCost(
-BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);
-Cost += thisT()->getCmpSelInstrCost(
-BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, 
CostKind);
-  }
-  return Cost;
-}
+  ISD = ISD::FP_TO_SINT_SAT;
+  break;
+case Intrinsic::fptoui_sat:
+  ISD = ISD::FP_TO_UINT_SAT;
+  break;
 case Intrinsic::ctpop:
   ISD = ISD::CTPOP;
   // In case of legalization use TCC_Expensive. This is cheaper than a
@@ -2418,6 +2398,32 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::fptosi_sat:
+case Intrinsic::fptoui_sat: {
+  if (Tys.empty())
+break;
+  Type *FromTy = Tys[0];
+  bool IsSigned = IID == Intrinsic::fptosi_sat;
+
+  InstructionCost Cost = 0;
+  IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy,
+ {FromTy, FromTy});
+  Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);
+  IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy,
+ {FromTy, FromTy});
+  Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);
+  Cost += thisT()->getCastInstrCost(
+  IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
+  TTI::CastContextHint::None, CostKind);
+  if (IsSigned) {
+Type *CondTy = RetTy->getWithNewBitWidth(1);
+Cost += thisT()->getCmpSelInstrCost(
+BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);
+Cost += thisT()->getCmpSelInstrCost(
+BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, 
CostKind);
+  }
+  return Cost;
+}
 default:
   break;
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll 
b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
index e4e29143985b2..6fbcf2a14da7d 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll
@@ -34,8 +34,8 @@ define void @casts() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: 
%v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: 
%v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: 
%v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated co

[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/100523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---

Patch is 245.20 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/100518.diff


8 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+52-38) 
- (modified) llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll (+12-12) 
- (modified) llvm/test/Analysis/CostModel/ARM/arith-overflow.ll (+152-152) 
- (modified) llvm/test/Analysis/CostModel/ARM/arith-ssat.ll (+86-86) 
- (modified) llvm/test/Analysis/CostModel/ARM/arith-usat.ll (+84-84) 
- (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-codesize.ll (+44-44) 
- (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-latency.ll (+44-44) 
- (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-sizelatency.ll 
(+44-44) 


``diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c842e4a2c4320..a89d4fe467eb9 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2180,44 +2180,17 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   return Cost;
 }
 case Intrinsic::sadd_with_overflow:
-case Intrinsic::ssub_with_overflow: {
-  Type *SumTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
-? BinaryOperator::Add
-: BinaryOperator::Sub;
-
-  //   Add:
-  //   Overflow -> (Result < LHS) ^ (RHS < 0)
-  //   Sub:
-  //   Overflow -> (Result < LHS) ^ (RHS > 0)
-  InstructionCost Cost = 0;
-  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
-  Cost += 2 * thisT()->getCmpSelInstrCost(
-  Instruction::ICmp, SumTy, OverflowTy,
-  CmpInst::ICMP_SGT, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
-  CostKind);
-  return Cost;
-}
+  ISD = ISD::SADDO;
+  break;
+case Intrinsic::ssub_with_overflow:
+  ISD = ISD::SSUBO;
+  break;
 case Intrinsic::uadd_with_overflow:
-case Intrinsic::usub_with_overflow: {
-  Type *SumTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
-? BinaryOperator::Add
-: BinaryOperator::Sub;
-  CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
-? CmpInst::ICMP_ULT
-: CmpInst::ICMP_UGT;
-
-  InstructionCost Cost = 0;
-  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
-  Cost +=
-  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
-  Pred, CostKind);
-  return Cost;
-}
+  ISD = ISD::UADDO;
+  break;
+case Intrinsic::usub_with_overflow:
+  ISD = ISD::USUBO;
+  break;
 case Intrinsic::smul_with_overflow:
 case Intrinsic::umul_with_overflow: {
   Type *MulTy = RetTy->getContainedType(0);
@@ -2296,8 +2269,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   break;
 }
 
+auto *ST = dyn_cast(RetTy);
+Type *LegalizeTy = ST ? ST->getContainedType(0) : RetTy;
+std::pair LT = getTypeLegalizationCost(LegalizeTy);
+
 const TargetLoweringBase *TLI = getTLI();
-std::pair LT = getTypeLegalizationCost(RetTy);
 
 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
   if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
@@ -2353,6 +2329,44 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Pred, CostKind);
   return Cost;
 }
+case Intrinsic::sadd_with_overflow:
+case Intrinsic::ssub_with_overflow: {
+  Type *SumTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
+? BinaryOperator::Add
+: BinaryOperator::Sub;
+
+  //   Add:
+  //   Overflow -> (Result < LHS) ^ (RHS < 0)
+  //   Sub:
+  //   Overflow -> (Result < LHS) ^ (RHS > 0)
+  InstructionCost Cost = 0;
+  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
+  Cost +=
+  2 * thisT()->getCmpSelInstrCost(Instruction::ICmp, SumTy, OverflowTy,
+  CmpInst::ICMP_SGT, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
+  CostKind);
+  return Cost;
+}
+case Intrinsic::uadd_with_overflow:
+case Intrinsic::usub_

[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: Matt Arsenault (arsenm)


Changes



---

Patch is 245.20 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/100518.diff


8 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+52-38) 
- (modified) llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll (+12-12) 
- (modified) llvm/test/Analysis/CostModel/ARM/arith-overflow.ll (+152-152) 
- (modified) llvm/test/Analysis/CostModel/ARM/arith-ssat.ll (+86-86) 
- (modified) llvm/test/Analysis/CostModel/ARM/arith-usat.ll (+84-84) 
- (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-codesize.ll (+44-44) 
- (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-latency.ll (+44-44) 
- (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-sizelatency.ll 
(+44-44) 


``diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c842e4a2c4320..a89d4fe467eb9 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2180,44 +2180,17 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   return Cost;
 }
 case Intrinsic::sadd_with_overflow:
-case Intrinsic::ssub_with_overflow: {
-  Type *SumTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
-? BinaryOperator::Add
-: BinaryOperator::Sub;
-
-  //   Add:
-  //   Overflow -> (Result < LHS) ^ (RHS < 0)
-  //   Sub:
-  //   Overflow -> (Result < LHS) ^ (RHS > 0)
-  InstructionCost Cost = 0;
-  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
-  Cost += 2 * thisT()->getCmpSelInstrCost(
-  Instruction::ICmp, SumTy, OverflowTy,
-  CmpInst::ICMP_SGT, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
-  CostKind);
-  return Cost;
-}
+  ISD = ISD::SADDO;
+  break;
+case Intrinsic::ssub_with_overflow:
+  ISD = ISD::SSUBO;
+  break;
 case Intrinsic::uadd_with_overflow:
-case Intrinsic::usub_with_overflow: {
-  Type *SumTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
-? BinaryOperator::Add
-: BinaryOperator::Sub;
-  CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
-? CmpInst::ICMP_ULT
-: CmpInst::ICMP_UGT;
-
-  InstructionCost Cost = 0;
-  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
-  Cost +=
-  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
-  Pred, CostKind);
-  return Cost;
-}
+  ISD = ISD::UADDO;
+  break;
+case Intrinsic::usub_with_overflow:
+  ISD = ISD::USUBO;
+  break;
 case Intrinsic::smul_with_overflow:
 case Intrinsic::umul_with_overflow: {
   Type *MulTy = RetTy->getContainedType(0);
@@ -2296,8 +2269,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   break;
 }
 
+auto *ST = dyn_cast(RetTy);
+Type *LegalizeTy = ST ? ST->getContainedType(0) : RetTy;
+std::pair LT = getTypeLegalizationCost(LegalizeTy);
+
 const TargetLoweringBase *TLI = getTLI();
-std::pair LT = getTypeLegalizationCost(RetTy);
 
 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
   if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
@@ -2353,6 +2329,44 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Pred, CostKind);
   return Cost;
 }
+case Intrinsic::sadd_with_overflow:
+case Intrinsic::ssub_with_overflow: {
+  Type *SumTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
+? BinaryOperator::Add
+: BinaryOperator::Sub;
+
+  //   Add:
+  //   Overflow -> (Result < LHS) ^ (RHS < 0)
+  //   Sub:
+  //   Overflow -> (Result < LHS) ^ (RHS > 0)
+  InstructionCost Cost = 0;
+  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
+  Cost +=
+  2 * thisT()->getCmpSelInstrCost(Instruction::ICmp, SumTy, OverflowTy,
+  CmpInst::ICMP_SGT, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
+  CostKind);
+  return Cost;
+}
+case Intrinsic::uadd_with_overflow:
+case Intrinsic::usub_w

[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---

Patch is 36.04 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/100522.diff


1 Files Affected:

- (added) llvm/test/Analysis/CostModel/AMDGPU/abs.ll (+341) 


``diff
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
new file mode 100644
index 0..133b95609bc15
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -0,0 +1,341 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck 
-check-prefixes=SLOW-SIZE %s
+; END.
+
+declare i64@llvm.abs.i64(i64, i1 immarg)
+declare <2 x i64>  @llvm.abs.v2i64(<2 x i64>, i1 immarg)
+declare <4 x i64>  @llvm.abs.v4i64(<4 x i64>, i1 immarg)
+declare <5 x i64>  @llvm.abs.v5i64(<5 x i64>, i1 immarg)
+declare <8 x i64>  @llvm.abs.v8i64(<8 x i64>, i1 immarg)
+
+declare i32@llvm.abs.i32(i32, i1 immarg)
+declare <2 x i32>  @llvm.abs.v2i32(<2 x i32>, i1 immarg)
+declare <4 x i32>  @llvm.abs.v4i32(<4 x i32>, i1 immarg)
+declare <8 x i32>  @llvm.abs.v8i32(<8 x i32>, i1 immarg)
+declare <9 x i32>  @llvm.abs.v9i32(<9 x i32>, i1 immarg)
+declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1 immarg)
+
+declare i16@llvm.abs.i16(i16, i1 immarg)
+declare <2 x i16>  @llvm.abs.v2i16(<2 x i16>, i1 immarg)
+declare <4 x i16>  @llvm.abs.v4i16(<4 x i16>, i1 immarg)
+declare <8 x i16>  @llvm.abs.v8i16(<8 x i16>, i1 immarg)
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg)
+declare <17 x i16> @llvm.abs.v17i16(<17 x i16>, i1 immarg)
+declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1 immarg)
+
+declare i8 @llvm.abs.i8(i8, i1 immarg)
+declare <2 x i8>   @llvm.abs.v2i8(<2 x i8>, i1 immarg)
+declare <4 x i8>   @llvm.abs.v4i8(<4 x i8>, i1 immarg)
+declare <8 x i8>   @llvm.abs.v8i8(<8 x i8>, i1 immarg)
+declare <16 x i8>  @llvm.abs.v16i8(<16 x i8>, i1 immarg)
+declare <32 x i8>  @llvm.abs.v32i8(<32 x i8>, i1 immarg)
+declare <33 x i8>  @llvm.abs.v33i8(<33 x i8>, i1 immarg)
+declare <64 x i8>  @llvm.abs.v64i8(<64 x i8>, i1 immarg)
+
+define i32 @abs_nonpoison(i32 %arg) {
+; FAST-LABEL: 'abs_nonpoison'
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 
= call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: 
%V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = 
call i32 @llvm.abs.i32(i32 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 
= call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: 
%V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: 
%V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: 
%V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/100523.diff


2 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+18-14) 
- (modified) llvm/test/Analysis/CostModel/AMDGPU/abs.ll (+20-20) 


``diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index 133b95609bc15..623e02eb8239d 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 
= call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 
= call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 3 f

[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/100523.diff


2 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+18-14) 
- (modified) llvm/test/Analysis/CostModel/AMDGPU/abs.ll (+20-20) 


``diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index 133b95609bc15..623e02eb8239d 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 
= call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 
= call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 3 fo

[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

ping 

https://github.com/llvm/llvm-project/pull/96760
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) (PR #100546)

2024-07-25 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/100546
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) (PR #100546)

2024-07-25 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/100546

Backport 6da6772bf0a33131aa8540c9d4f60d5db75c32b5

Requested by: @kmclaughlin-arm

>From 6271d3d03846216502487711f330227a4a6d3022 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin 
Date: Wed, 24 Jul 2024 14:30:25 +0100
Subject: [PATCH] [AArch64][SME] Rewrite __arm_get_current_vg to preserve
 required registers (#100143)

The documentation for the __arm_get_current_vg support routine specifies
that the following registers are call-preserved:
 - X1-X15, X19-X29 and SP
 - Z0-Z31
 - P0-P15

This patch rewrites the implementation of this routine in compiler-rt,
as the current version does not guarantee that these registers will be
preserved.

(cherry picked from commit 6da6772bf0a33131aa8540c9d4f60d5db75c32b5)
---
 compiler-rt/lib/builtins/aarch64/sme-abi-vg.c | 28 
 compiler-rt/lib/builtins/aarch64/sme-abi.S| 44 +++
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c 
b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
index 062cf80fc6848..20061012e16c6 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c
@@ -10,15 +10,6 @@ struct FEATURES {
 
 extern struct FEATURES __aarch64_cpu_features;
 
-struct SME_STATE {
-  long PSTATE;
-  long TPIDR2_EL0;
-};
-
-extern struct SME_STATE __arm_sme_state(void) __arm_streaming_compatible;
-
-extern bool __aarch64_has_sme_and_tpidr2_el0;
-
 #if __GNUC__ >= 9
 #pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
 #endif
@@ -28,22 +19,3 @@ __attribute__((constructor(90))) static void 
get_aarch64_cpu_features(void) {
 
   __init_cpu_features();
 }
-
-__attribute__((target("sve"))) long
-__arm_get_current_vg(void) __arm_streaming_compatible {
-  struct SME_STATE State = __arm_sme_state();
-  unsigned long long features =
-  __atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED);
-  bool HasSVE = features & (1ULL << FEAT_SVE);
-
-  if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0)
-return 0;
-
-  if (HasSVE || (State.PSTATE & 1)) {
-long vl;
-__asm__ __volatile__("cntd %0" : "=r"(vl));
-return vl;
-  }
-
-  return 0;
-}
diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S 
b/compiler-rt/lib/builtins/aarch64/sme-abi.S
index 4c0ff66931db7..cd8153f60670f 100644
--- a/compiler-rt/lib/builtins/aarch64/sme-abi.S
+++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S
@@ -12,11 +12,15 @@
 #if !defined(__APPLE__)
 #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
 #define TPIDR2_SYMBOL_OFFSET 
:lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
+#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
+#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
 #else
 // MachO requires @page/@pageoff directives because the global is defined
 // in a different file. Otherwise this file may fail to build.
 #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
 #define TPIDR2_SYMBOL_OFFSET 
SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
+#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
+#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
 #endif
 
 .arch armv9-a+sme
@@ -180,6 +184,46 @@ 
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
   ret
 END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)
 
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
+  .variant_pcs __arm_get_current_vg
+  BTI_C
+
+  stp x29, x30, [sp, #-16]!
+  .cfi_def_cfa_offset 16
+  mov x29, sp
+  .cfi_def_cfa w29, 16
+  .cfi_offset w30, -8
+  .cfi_offset w29, -16
+  adrpx17, CPU_FEATS_SYMBOL
+  ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
+  tbnzw17, #30, 0f
+  adrpx16, TPIDR2_SYMBOL
+  ldrbw16, [x16, TPIDR2_SYMBOL_OFFSET]
+  cbz w16, 1f
+0:
+  mov x18, x1
+  bl  __arm_sme_state
+  mov x1, x18
+  and x17, x17, #0x4000
+  bfxil   x17, x0, #0, #1
+  cbz x17, 1f
+  cntdx0
+  .cfi_def_cfa wsp, 16
+  ldp x29, x30, [sp], #16
+  .cfi_def_cfa_offset 0
+  .cfi_restore w30
+  .cfi_restore w29
+  ret
+1:
+  mov x0, xzr
+  .cfi_def_cfa wsp, 16
+  ldp x29, x30, [sp], #16
+  .cfi_def_cfa_offset 0
+  .cfi_restore w30
+  .cfi_restore w29
+  ret
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg)
+
 NO_EXEC_STACK_DIRECTIVE
 
 // GNU property note for BTI and PAC

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) (PR #100546)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:

@sdesmalen-arm What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/100546
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) (PR #100546)

2024-07-25 Thread Sander de Smalen via llvm-branch-commits

sdesmalen-arm wrote:

It would be great if we could merge this fix into the release branch!

https://github.com/llvm/llvm-project/pull/100546
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [flang] release/19.x: [Flang][Driver] Enable config file options (#100343) (PR #100541)

2024-07-25 Thread Paul Osmialowski via llvm-branch-commits

https://github.com/pawosm-arm approved this pull request.


https://github.com/llvm/llvm-project/pull/100541
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)

2024-07-25 Thread Simon Pilgrim via llvm-branch-commits


@@ -42,75 +42,50 @@ define i32 @umax(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 
= call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 
= call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = 
call i32 @llvm.umax.i32(i32 undef, i32 undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = 
call i32 @llvm.umax.i32(i32 undef, i32 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 
= call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 
= call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: 
%V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = 
call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I16 
= call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: 
%V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: 
%V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: 
%V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: 
%V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = 
call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 
= call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 
= call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I8 
= call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: 
%V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: 
%V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = 
call i16 @llvm.umax.i16(i16 undef, i16 undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 
= call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 
= call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 
= call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = 
call i8 @llvm.umax.i8(i8 undef, i8 undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 
= call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 
= call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I8 
= call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: 
%V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> un

[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)

2024-07-25 Thread Jay Foad via llvm-branch-commits

https://github.com/jayfoad edited 
https://github.com/llvm/llvm-project/pull/100513
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)

2024-07-25 Thread Jay Foad via llvm-branch-commits

https://github.com/jayfoad approved this pull request.

LGTM.

https://github.com/llvm/llvm-project/pull/100513
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)

2024-07-25 Thread Jay Foad via llvm-branch-commits


@@ -0,0 +1,366 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 
-passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @uadd_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 
[[ARG1_0]])
+; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 
[[ARG1_1]])
+; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], 
i64 0
+; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 
[[ADD_1]], i64 1
+; GFX7-NEXT:ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @uadd_sat_v2i16(
+; GFX8-NEXT:  bb:
+; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT:ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @uadd_sat_v2i16(
+; GFX9-NEXT:  bb:
+; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT:ret <2 x i16> [[TMP0]]
+;
+bb:
+  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+  %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
+  %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
+  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+  ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @usub_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 
[[ARG1_0]])
+; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 
[[ARG1_1]])
+; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], 
i64 0
+; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 
[[ADD_1]], i64 1
+; GFX7-NEXT:ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @usub_sat_v2i16(
+; GFX8-NEXT:  bb:
+; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT:ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @usub_sat_v2i16(
+; GFX9-NEXT:  bb:
+; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT:ret <2 x i16> [[TMP0]]
+;
+bb:
+  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+  %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0)
+  %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1)
+  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+  ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @sadd_sat_v2i16(
+; GFX7-NEXT:  bb:
+; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 
[[ARG1_0]])
+; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 
[[ARG1_1]])
+; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], 
i64 0
+; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 
[[ADD_1]], i64 1
+; GFX7-NEXT:ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @sadd_sat_v2i16(
+; GFX8-NEXT:  bb:
+; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> 
[[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT:ret <2 x i16> [[T

[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Jay Foad via llvm-branch-commits


@@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 
= call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 
= call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)

jayfoad wrote:

What is this demonstrating? 2 does not seem like the right cost for any 
VALU/SALU operation on v32i16.

https://github.com/llvm/llvm-project/pull/100523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Automate operand structure definition (PR #99508)

2024-07-25 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak updated 
https://github.com/llvm/llvm-project/pull/99508

>From 1d99939c020aab8650cd20df24e0b1e71726ae90 Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Wed, 17 Jul 2024 13:26:09 +0100
Subject: [PATCH 1/3] [MLIR][OpenMP] Automate operand structure definition

This patch adds the "gen-openmp-clause-ops" `mlir-tblgen` generator to produce
the structure definitions previously in OpenMPClauseOperands.h automatically
from the information contained in OpenMPOps.td and OpenMPClauses.td.

Changes introduced to the `ElementsAttrBase` common tablegen class, as well as
some of its subclasses, add more fine-grained information on their shape and
type of their elements. This information is needed in order to properly
generate the corresponding types to represent these attributes within the
produced operand structures.

The original header is maintained to enable the definition of similar
structures that are not directly related to any single `OpenMP_Clause` or
`OpenMP_Op` tablegen definition.
---
 .../mlir/Dialect/OpenMP/CMakeLists.txt|   1 +
 .../Dialect/OpenMP/OpenMPClauseOperands.h | 290 +-
 mlir/include/mlir/IR/CommonAttrConstraints.td |  18 +-
 mlir/test/mlir-tblgen/openmp-clause-ops.td|  78 +
 mlir/tools/mlir-tblgen/OmpOpGen.cpp   | 174 ++-
 5 files changed, 266 insertions(+), 295 deletions(-)
 create mode 100644 mlir/test/mlir-tblgen/openmp-clause-ops.td

diff --git a/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt 
b/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt
index d3422f6e48b06..23ccba3067bcb 100644
--- a/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt
@@ -17,6 +17,7 @@ mlir_tablegen(OpenMPOpsDialect.h.inc -gen-dialect-decls 
-dialect=omp)
 mlir_tablegen(OpenMPOpsDialect.cpp.inc -gen-dialect-defs -dialect=omp)
 mlir_tablegen(OpenMPOps.h.inc -gen-op-decls)
 mlir_tablegen(OpenMPOps.cpp.inc -gen-op-defs)
+mlir_tablegen(OpenMPClauseOps.h.inc -gen-openmp-clause-ops)
 mlir_tablegen(OpenMPOpsTypes.h.inc -gen-typedef-decls -typedefs-dialect=omp)
 mlir_tablegen(OpenMPOpsTypes.cpp.inc -gen-typedef-defs -typedefs-dialect=omp)
 mlir_tablegen(OpenMPOpsEnums.h.inc -gen-enum-decls)
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
index f4a87d52a172e..e5b4de4908966 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
@@ -23,303 +23,31 @@
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h.inc"
 
+#include "mlir/Dialect/OpenMP/OpenMPClauseOps.h.inc"
+
 namespace mlir {
 namespace omp {
 
 
//===--===//
-// Mixin structures defining MLIR operands associated with each OpenMP clause.
+// Extra clause operand structures.
 
//===--===//
 
-struct AlignedClauseOps {
-  llvm::SmallVector alignedVars;
-  llvm::SmallVector alignments;
-};
-
-struct AllocateClauseOps {
-  llvm::SmallVector allocateVars, allocatorVars;
-};
-
-struct CancelDirectiveNameClauseOps {
-  ClauseCancellationConstructTypeAttr cancelDirective;
-};
-
-struct CollapseClauseOps {
-  llvm::SmallVector collapseLowerBound, collapseUpperBound, 
collapseStep;
-};
-
-struct CopyprivateClauseOps {
-  llvm::SmallVector copyprivateVars;
-  llvm::SmallVector copyprivateSyms;
-};
-
-struct CriticalNameClauseOps {
-  StringAttr symName;
-};
-
-struct DependClauseOps {
-  llvm::SmallVector dependKinds;
-  llvm::SmallVector dependVars;
-};
-
-struct DeviceClauseOps {
-  Value device;
-};
-
 struct DeviceTypeClauseOps {
   // The default capture type.
   DeclareTargetDeviceType deviceType = DeclareTargetDeviceType::any;
 };
 
-struct DistScheduleClauseOps {
-  UnitAttr distScheduleStatic;
-  Value distScheduleChunkSize;
-};
-
-struct DoacrossClauseOps {
-  ClauseDependAttr doacrossDependType;
-  IntegerAttr doacrossNumLoops;
-  llvm::SmallVector doacrossDependVars;
-};
-
-struct FilterClauseOps {
-  Value filteredThreadId;
-};
-
-struct FinalClauseOps {
-  Value final;
-};
-
-struct GrainsizeClauseOps {
-  Value grainsize;
-};
-
-struct HasDeviceAddrClauseOps {
-  llvm::SmallVector hasDeviceAddrVars;
-};
-
-struct HintClauseOps {
-  IntegerAttr hint;
-};
-
-struct IfClauseOps {
-  Value ifVar;
-};
-
-struct InReductionClauseOps {
-  llvm::SmallVector inReductionVars;
-  llvm::SmallVector inReductionByref;
-  llvm::SmallVector inReductionSyms;
-};
-
-struct IsDevicePtrClauseOps {
-  llvm::SmallVector isDevicePtrVars;
-};
-
-struct LinearClauseOps {
-  llvm::SmallVector linearVars, linearStepVars;
-};
-
-struct LoopRelatedOps {
-  UnitAttr loopInclusive;
-};
-
-struct MapClauseOps {
-  llvm::SmallVector mapVars;
-};
-
-struct MergeableClauseOps {
-  UnitAttr mergeable;
-};
-
-struct NogroupClauseOps {
-  UnitAttr nogroup;
-};

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Automate operand structure definition (PR #99508)

2024-07-25 Thread Sergio Afonso via llvm-branch-commits


@@ -12,11 +12,52 @@
 
 #include "mlir/TableGen/GenInfo.h"
 
+#include "mlir/TableGen/CodeGenHelpers.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/TypeSwitch.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 
 using namespace llvm;
 
+/// The code block defining the base mixin class for combining clause operand
+/// structures.
+static const char *const baseMixinClass = R"(
+namespace detail {
+template 
+struct Clauses : public Mixins... {};
+} // namespace detail
+)";
+
+/// The code block defining operation argument structures.
+static const char *const operationArgStruct = R"(
+using {0}Operands = detail::Clauses<{1}>;
+)";
+
+/// Remove multiple optional prefixes and suffixes from \c str.
+///
+/// Prefixes and suffixes are attempted to be removed once in the order they
+/// appear in the \c prefixes and \c suffixes arguments. All prefixes are
+/// processed before suffixes are. This means it will behave as shown in the
+/// following example:
+///   - str: "PrePreNameSuf1Suf2"
+///   - prefixes: ["Pre"]
+///   - suffixes: ["Suf1", "Suf2"]
+///   - return: "PreNameSuf1"
+static StringRef stripPrefixAndSuffix(StringRef str,
+  llvm::ArrayRef prefixes,
+  llvm::ArrayRef suffixes) {
+  for (StringRef prefix : prefixes)
+if (str.starts_with(prefix))
+  str = str.substr(prefix.size());
+
+  for (StringRef suffix : suffixes)
+if (str.ends_with(suffix))
+  str = str.substr(0, str.size() - suffix.size());

skatrak wrote:

Done.

https://github.com/llvm/llvm-project/pull/99508
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Automate operand structure definition (PR #99508)

2024-07-25 Thread Sergio Afonso via llvm-branch-commits


@@ -12,11 +12,52 @@
 
 #include "mlir/TableGen/GenInfo.h"
 
+#include "mlir/TableGen/CodeGenHelpers.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/TypeSwitch.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 
 using namespace llvm;
 
+/// The code block defining the base mixin class for combining clause operand
+/// structures.
+static const char *const baseMixinClass = R"(
+namespace detail {
+template 
+struct Clauses : public Mixins... {};
+} // namespace detail
+)";
+
+/// The code block defining operation argument structures.
+static const char *const operationArgStruct = R"(
+using {0}Operands = detail::Clauses<{1}>;
+)";
+
+/// Remove multiple optional prefixes and suffixes from \c str.
+///
+/// Prefixes and suffixes are attempted to be removed once in the order they
+/// appear in the \c prefixes and \c suffixes arguments. All prefixes are
+/// processed before suffixes are. This means it will behave as shown in the
+/// following example:
+///   - str: "PrePreNameSuf1Suf2"
+///   - prefixes: ["Pre"]
+///   - suffixes: ["Suf1", "Suf2"]
+///   - return: "PreNameSuf1"
+static StringRef stripPrefixAndSuffix(StringRef str,
+  llvm::ArrayRef prefixes,
+  llvm::ArrayRef suffixes) {
+  for (StringRef prefix : prefixes)
+if (str.starts_with(prefix))
+  str = str.substr(prefix.size());

skatrak wrote:

Done.

https://github.com/llvm/llvm-project/pull/99508
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Automate operand structure definition (PR #99508)

2024-07-25 Thread Sergio Afonso via llvm-branch-commits


@@ -408,17 +408,26 @@ class ElementsAttrBase :
   let storageType = [{ ::mlir::ElementsAttr }];
   let returnType = [{ ::mlir::ElementsAttr }];
   let convertFromStorage = "$_self";
+
+  // The underlying C++ value type of each element.
+  string elementReturnType = ?;

skatrak wrote:

> I'm wary about making this kind of change in a widely shared file. Maybe we 
> could just handle this in OmpOpGen.cpp? Specifically, infer this information 
> in there based on the type of the attribute?

Yes, this is something I tried to avoid as well. The problem is that the only 
existing attribute we could potentially use to get the element type information 
is the `returnType` inherited from `Attr`. We could potentially remove the 
"::llvm::ArrayRef<>" part of that string in the case of `DenseArrayAttrBase` 
and derived types, which doesn't seem like a very clean solution but it would 
work (as long as these subclasses/definitions don't override that property). 
For other subclasses of `ElementsAttrBase` we would have to accept having to 
use array-style attributes (e.g. `::mlir::DenseIntElementsAttr`) instead of 
lists of elements.

I'd like to avoid hardcoding as many type names as possible in the new tablegen 
backend, since people could just create new general or OpenMP-specific 
attribute types and then it would have to be updated. I think it makes sense to 
specialize it for as few and as generic cases as we can get away with and just 
make sure they already contain the information we need. In this case, we're 
just missing the element type and rank of array attributes, which seems like 
something that could be of general use eventually.

Having said that, this is just the approach that works that made the most sense 
to me, but I'm very much interested in discussing potentially better 
alternatives.

> This may need wider support, specifically we may need to generate an accessor 
> function in .h.inc/.cpp.inc.

Good point, I'll delay making this change until we decide whether we want to 
keep these new properties or not.

https://github.com/llvm/llvm-project/pull/99508
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang][headers] Including stddef.h always redefines NULL (#99727) (PR #100191)

2024-07-25 Thread Aaron Ballman via llvm-branch-commits

https://github.com/AaronBallman approved this pull request.

LGTM, I think the CI failures are unrelated to this patch.

https://github.com/llvm/llvm-project/pull/100191
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] cf55425 - Revert "MTM: fix issues after cursory reading (#100404)"

2024-07-25 Thread via llvm-branch-commits

Author: Mehdi Amini
Date: 2024-07-25T14:06:44+02:00
New Revision: cf55425639c201d8b3a80541bc222934485d5eda

URL: 
https://github.com/llvm/llvm-project/commit/cf55425639c201d8b3a80541bc222934485d5eda
DIFF: 
https://github.com/llvm/llvm-project/commit/cf55425639c201d8b3a80541bc222934485d5eda.diff

LOG: Revert "MTM: fix issues after cursory reading (#100404)"

This reverts commit 0760aec54ca6f680f4786c4fc3bbae8f500deeab.

Added: 


Modified: 
llvm/lib/CodeGen/MachineTraceMetrics.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp 
b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index dd1faff355b52..bf3add010574b 100644
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -24,11 +24,17 @@
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include 
+#include 
+#include 
+#include 
+#include 
 
 using namespace llvm;
 
@@ -127,7 +133,7 @@ MachineTraceMetrics::getResources(const MachineBasicBlock 
*MBB) {
 
   // Scale the resource cycles so they are comparable.
   unsigned PROffset = MBB->getNumber() * PRKinds;
-  for (unsigned K = 0; K < PRKinds; ++K)
+  for (unsigned K = 0; K != PRKinds; ++K)
 ProcReleaseAtCycles[PROffset + K] =
   PRCycles[K] * SchedModel.getResourceFactor(K);
 
@@ -140,14 +146,15 @@ MachineTraceMetrics::getProcReleaseAtCycles(unsigned 
MBBNum) const {
  "getResources() must be called before getProcReleaseAtCycles()");
   unsigned PRKinds = SchedModel.getNumProcResourceKinds();
   assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
-  return ArrayRef{ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds};
+  return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds);
 }
 
 
//===--===//
 // Ensemble utility functions
 
//===--===//
 
-MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *CT) : MTM(*CT) {
+MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
+  : MTM(*ct) {
   BlockInfo.resize(MTM.BlockInfo.size());
   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
   ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
@@ -191,7 +198,7 @@ computeDepthResources(const MachineBasicBlock *MBB) {
   // Compute per-resource depths.
   ArrayRef PredPRDepths = getProcResourceDepths(PredNum);
   ArrayRef PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
-  for (unsigned K = 0; K < PRKinds; ++K)
+  for (unsigned K = 0; K != PRKinds; ++K)
 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
 }
 
@@ -224,7 +231,7 @@ computeHeightResources(const MachineBasicBlock *MBB) {
 
   // Compute per-resource heights.
   ArrayRef SuccPRHeights = getProcResourceHeights(SuccNum);
-  for (unsigned K = 0; K < PRKinds; ++K)
+  for (unsigned K = 0; K != PRKinds; ++K)
 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
 }
 
@@ -257,7 +264,7 @@ MachineTraceMetrics::Ensemble::
 getProcResourceDepths(unsigned MBBNum) const {
   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
   assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
-  return ArrayRef{ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds};
+  return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
 }
 
 /// Get an array of processor resource heights for MBB. Indexed by processor
@@ -270,7 +277,7 @@ MachineTraceMetrics::Ensemble::
 getProcResourceHeights(unsigned MBBNum) const {
   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
   assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
-  return ArrayRef{ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds};
+  return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
 }
 
 
//===--===//
@@ -307,8 +314,8 @@ class MinInstrCountEnsemble : public 
MachineTraceMetrics::Ensemble {
   const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
 
 public:
-  MinInstrCountEnsemble(MachineTraceMetrics *MTM)
-  : MachineTraceMetrics::Ensemble(MTM) {}
+  MinInstrCountEnsemble(MachineTraceMetrics *mtm)
+: MachineTraceMetrics::Ensemble(mtm) {}
 };
 
 /// Pick only the current basic block for the trace and do not choose any
@@ -388,15 +395,15 @@ MinInstrCountEnsemble::pickTraceSucc(const 
MachineBasicBlock *MBB) {
 
 // Get an Ensemble sub-class for the requested trace strategy.
 MachineTraceMetrics::Ensemble *
-MachineTraceMetrics::getEnsemble(MachineTraceStrat

[llvm-branch-commits] [lldb] b851520 - Revert "[lldb] Fix incorrect uses of logical operator in 'if' condition check…"

2024-07-25 Thread via llvm-branch-commits

Author: David Spickett
Date: 2024-07-25T13:16:55+01:00
New Revision: b85152008f41e8136f5e21db875a63b464f8c10f

URL: 
https://github.com/llvm/llvm-project/commit/b85152008f41e8136f5e21db875a63b464f8c10f
DIFF: 
https://github.com/llvm/llvm-project/commit/b85152008f41e8136f5e21db875a63b464f8c10f.diff

LOG: Revert "[lldb] Fix incorrect uses of logical operator in 'if' condition 
check…"

This reverts commit 2ba3fe7356f065757a2279f65e4ef5c8f1476293.

Added: 


Modified: 

lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h

Removed: 




diff  --git 
a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h
 
b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h
index 2667f73516ba3..e1a3156d10afd 100644
--- 
a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h
+++ 
b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h
@@ -85,15 +85,14 @@ class ScriptedPythonInterface : virtual public 
ScriptedInterface {
 bool has_class_name = !class_name.empty();
 bool has_interpreter_dict =
 !(llvm::StringRef(m_interpreter.GetDictionaryName()).empty());
-
-if (!has_class_name)
-  return create_error("Missing script class name.");
-
-if (!has_interpreter_dict)
-  return create_error("Invalid script interpreter dictionary.");
-
-if (!script_obj)
-  return create_error("Missing scripting object.");
+if (!has_class_name && !has_interpreter_dict && !script_obj) {
+  if (!has_class_name)
+return create_error("Missing script class name.");
+  else if (!has_interpreter_dict)
+return create_error("Invalid script interpreter dictionary.");
+  else
+return create_error("Missing scripting object.");
+}
 
 Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
Locker::FreeLock);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (PR #99283)

2024-07-25 Thread Adrian Vogelsgesang via llvm-branch-commits


@@ -1967,22 +2047,13 @@ splitCoroutine(Function &F, SmallVectorImpl 
&Clones,
   for (DbgVariableRecord *DVR : DbgVariableRecords)
 coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame,
false /*UseEntryValue*/);
-  return Shape;
-}
 
-/// Remove calls to llvm.coro.end in the original function.
-static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) {
-  if (Shape.ABI != coro::ABI::Switch) {
-for (auto *End : Shape.CoroEnds) {
-  replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr);
-}
-  } else {
-for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) {
-  auto &Context = End->getContext();
-  End->replaceAllUsesWith(ConstantInt::getFalse(Context));
-  End->eraseFromParent();
-}
+  removeCoroEndsFromRampFunction(Shape);
+
+  if (!isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch) {

vogelsgesang wrote:

Yes, I would be in favor of adding a second attribute. What do you & others 
think?
CC @ChuanqiXu9 

https://github.com/llvm/llvm-project/pull/99283
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] release/19.x: [flang][OpenMP] Initialize privatised derived type variables (#100417) (PR #100587)

2024-07-25 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/100587

Backport 98e733e

Requested by: @tblah

>From 6867a167f40aacd653e56a3fab08bc52797f46cd Mon Sep 17 00:00:00 2001
From: Tom Eccles 
Date: Thu, 25 Jul 2024 16:53:27 +0100
Subject: [PATCH] [flang][OpenMP] Initialize privatised derived type variables
 (#100417)

Fixes #91928

(cherry picked from commit 98e733eaf2af1a5c1d9392e279d21182ffdf560d)
---
 flang/include/flang/Lower/ConvertVariable.h   |  8 
 flang/lib/Lower/ConvertVariable.cpp   | 23 -
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |  6 +++
 .../Lower/OpenMP/private-derived-type.f90 | 47 +++
 4 files changed, 73 insertions(+), 11 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/private-derived-type.f90

diff --git a/flang/include/flang/Lower/ConvertVariable.h 
b/flang/include/flang/Lower/ConvertVariable.h
index 515f4695951b4..de394a39e112e 100644
--- a/flang/include/flang/Lower/ConvertVariable.h
+++ b/flang/include/flang/Lower/ConvertVariable.h
@@ -62,6 +62,14 @@ using AggregateStoreMap = llvm::DenseMap;
 void instantiateVariable(AbstractConverter &, const pft::Variable &var,
  SymMap &symMap, AggregateStoreMap &storeMap);
 
+/// Does this variable have a default initialization?
+bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym);
+
+/// Call default initialization runtime routine to initialize \p var.
+void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter,
+const Fortran::semantics::Symbol &sym,
+Fortran::lower::SymMap &symMap);
+
 /// Create a fir::GlobalOp given a module variable definition. This is intended
 /// to be used when lowering a module definition, not when lowering variables
 /// used from a module. For used variables instantiateVariable must directly be
diff --git a/flang/lib/Lower/ConvertVariable.cpp 
b/flang/lib/Lower/ConvertVariable.cpp
index 47ad48fb322cc..4fcfa0b126e04 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -72,7 +72,8 @@ static mlir::Value 
genScalarValue(Fortran::lower::AbstractConverter &converter,
 }
 
 /// Does this variable have a default initialization?
-static bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym) {
+bool Fortran::lower::hasDefaultInitialization(
+const Fortran::semantics::Symbol &sym) {
   if (sym.has() && sym.size())
 if (!Fortran::semantics::IsAllocatableOrPointer(sym))
   if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType())
@@ -353,7 +354,7 @@ static mlir::Value genComponentDefaultInit(
   // global constructor since this has no runtime cost.
   componentValue = fir::factory::createUnallocatedBox(
   builder, loc, componentTy, std::nullopt);
-} else if (hasDefaultInitialization(component)) {
+} else if (Fortran::lower::hasDefaultInitialization(component)) {
   // Component type has default initialization.
   componentValue = genDefaultInitializerValue(converter, loc, component,
   componentTy, stmtCtx);
@@ -556,7 +557,7 @@ static fir::GlobalOp 
defineGlobal(Fortran::lower::AbstractConverter &converter,
 builder.createConvert(loc, symTy, fir::getBase(initVal));
 builder.create(loc, castTo);
   });
-} else if (hasDefaultInitialization(sym)) {
+} else if (Fortran::lower::hasDefaultInitialization(sym)) {
   Fortran::lower::createGlobalInitialization(
   builder, global, [&](fir::FirOpBuilder &builder) {
 Fortran::lower::StatementContext stmtCtx(
@@ -752,17 +753,15 @@ mustBeDefaultInitializedAtRuntime(const 
Fortran::lower::pft::Variable &var) {
 return true;
   // Local variables (including function results), and intent(out) dummies must
   // be default initialized at runtime if their type has default 
initialization.
-  return hasDefaultInitialization(sym);
+  return Fortran::lower::hasDefaultInitialization(sym);
 }
 
 /// Call default initialization runtime routine to initialize \p var.
-static void
-defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter,
-   const Fortran::lower::pft::Variable &var,
-   Fortran::lower::SymMap &symMap) {
+void Fortran::lower::defaultInitializeAtRuntime(
+Fortran::lower::AbstractConverter &converter,
+const Fortran::semantics::Symbol &sym, Fortran::lower::SymMap &symMap) {
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   mlir::Location loc = converter.getCurrentLocation();
-  const Fortran::semantics::Symbol &sym = var.getSymbol();
   fir::ExtendedValue exv = converter.getSymbolExtendedValue(sym, &symMap);
   if (Fortran::semantics::IsOptional(sym)) {
 // 15.5.2.12 point 3, absent optional dummies are not initialized.
@@ -927,7 +926,8 @@ static void 
instantiateLocal(Fortran

[llvm-branch-commits] [flang] release/19.x: [flang][OpenMP] Initialize privatised derived type variables (#100417) (PR #100587)

2024-07-25 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/100587
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] release/19.x: [flang][OpenMP] Initialize privatised derived type variables (#100417) (PR #100587)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:

@jeanPerier What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/100587
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] release/19.x: [flang][OpenMP] Initialize privatised derived type variables (#100417) (PR #100587)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: None (llvmbot)


Changes

Backport 98e733e

Requested by: @tblah

---
Full diff: https://github.com/llvm/llvm-project/pull/100587.diff


4 Files Affected:

- (modified) flang/include/flang/Lower/ConvertVariable.h (+8) 
- (modified) flang/lib/Lower/ConvertVariable.cpp (+12-11) 
- (modified) flang/lib/Lower/OpenMP/DataSharingProcessor.cpp (+6) 
- (added) flang/test/Lower/OpenMP/private-derived-type.f90 (+47) 


``diff
diff --git a/flang/include/flang/Lower/ConvertVariable.h 
b/flang/include/flang/Lower/ConvertVariable.h
index 515f4695951b4..de394a39e112e 100644
--- a/flang/include/flang/Lower/ConvertVariable.h
+++ b/flang/include/flang/Lower/ConvertVariable.h
@@ -62,6 +62,14 @@ using AggregateStoreMap = llvm::DenseMap;
 void instantiateVariable(AbstractConverter &, const pft::Variable &var,
  SymMap &symMap, AggregateStoreMap &storeMap);
 
+/// Does this variable have a default initialization?
+bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym);
+
+/// Call default initialization runtime routine to initialize \p var.
+void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter,
+const Fortran::semantics::Symbol &sym,
+Fortran::lower::SymMap &symMap);
+
 /// Create a fir::GlobalOp given a module variable definition. This is intended
 /// to be used when lowering a module definition, not when lowering variables
 /// used from a module. For used variables instantiateVariable must directly be
diff --git a/flang/lib/Lower/ConvertVariable.cpp 
b/flang/lib/Lower/ConvertVariable.cpp
index 47ad48fb322cc..4fcfa0b126e04 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -72,7 +72,8 @@ static mlir::Value 
genScalarValue(Fortran::lower::AbstractConverter &converter,
 }
 
 /// Does this variable have a default initialization?
-static bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym) {
+bool Fortran::lower::hasDefaultInitialization(
+const Fortran::semantics::Symbol &sym) {
   if (sym.has() && sym.size())
 if (!Fortran::semantics::IsAllocatableOrPointer(sym))
   if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType())
@@ -353,7 +354,7 @@ static mlir::Value genComponentDefaultInit(
   // global constructor since this has no runtime cost.
   componentValue = fir::factory::createUnallocatedBox(
   builder, loc, componentTy, std::nullopt);
-} else if (hasDefaultInitialization(component)) {
+} else if (Fortran::lower::hasDefaultInitialization(component)) {
   // Component type has default initialization.
   componentValue = genDefaultInitializerValue(converter, loc, component,
   componentTy, stmtCtx);
@@ -556,7 +557,7 @@ static fir::GlobalOp 
defineGlobal(Fortran::lower::AbstractConverter &converter,
 builder.createConvert(loc, symTy, fir::getBase(initVal));
 builder.create(loc, castTo);
   });
-} else if (hasDefaultInitialization(sym)) {
+} else if (Fortran::lower::hasDefaultInitialization(sym)) {
   Fortran::lower::createGlobalInitialization(
   builder, global, [&](fir::FirOpBuilder &builder) {
 Fortran::lower::StatementContext stmtCtx(
@@ -752,17 +753,15 @@ mustBeDefaultInitializedAtRuntime(const 
Fortran::lower::pft::Variable &var) {
 return true;
   // Local variables (including function results), and intent(out) dummies must
   // be default initialized at runtime if their type has default 
initialization.
-  return hasDefaultInitialization(sym);
+  return Fortran::lower::hasDefaultInitialization(sym);
 }
 
 /// Call default initialization runtime routine to initialize \p var.
-static void
-defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter,
-   const Fortran::lower::pft::Variable &var,
-   Fortran::lower::SymMap &symMap) {
+void Fortran::lower::defaultInitializeAtRuntime(
+Fortran::lower::AbstractConverter &converter,
+const Fortran::semantics::Symbol &sym, Fortran::lower::SymMap &symMap) {
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   mlir::Location loc = converter.getCurrentLocation();
-  const Fortran::semantics::Symbol &sym = var.getSymbol();
   fir::ExtendedValue exv = converter.getSymbolExtendedValue(sym, &symMap);
   if (Fortran::semantics::IsOptional(sym)) {
 // 15.5.2.12 point 3, absent optional dummies are not initialized.
@@ -927,7 +926,8 @@ static void 
instantiateLocal(Fortran::lower::AbstractConverter &converter,
   if (needDummyIntentoutFinalization(var))
 finalizeAtRuntime(converter, var, symMap);
   if (mustBeDefaultInitializedAtRuntime(var))
-defaultInitializeAtRuntime(converter, var, symMap);
+Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),

[llvm-branch-commits] [clang] release/19.x: [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) (PR #100590)

2024-07-25 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/100590
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) (PR #100590)

2024-07-25 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/100590

Backport 3295d377f37a60597321f502d164b5d6b1948e28

Requested by: @Endilll

>From 3c7695ec2681c3ca531b5ce2a2fd20b8301df0b5 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov 
Date: Thu, 25 Jul 2024 20:15:14 +0400
Subject: [PATCH] [clang] Remove `__is_layout_compatible` from revertible type
 traits list (#100572)

`__is_layout_compatible` was added in Clang 19 (#81506), and at that
time it wasn't entirely clear whether it should be a revertible type
trait or not. We decided to follow the example of other type traits.
Since then #95969 happened, and now we know that we don't want new
revertible type traits.

This patch removes `__is_layout_compatible` from revertible type traits
list, and leaves a comment what revertible type traits are, and that new
type traits should not be added there.

The intention is to also cherry-pick this to 19 branch.

(cherry picked from commit 3295d377f37a60597321f502d164b5d6b1948e28)
---
 clang/lib/Parse/ParseExpr.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 0a017ae79de75..e82b565272831 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -763,6 +763,9 @@ class CastExpressionIdValidator final : public 
CorrectionCandidateCallback {
 bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II,
tok::TokenKind *Kind) {
   if (RevertibleTypeTraits.empty()) {
+// Revertible type trait is a feature for backwards compatibility with older
+// standard libraries that declare their own structs with the same name as
+// the builtins listed below. New builtins should NOT be added to this list.
 #define RTT_JOIN(X, Y) X##Y
 #define REVERTIBLE_TYPE_TRAIT(Name)
\
   RevertibleTypeTraits[PP.getIdentifierInfo(#Name)] = RTT_JOIN(tok::kw_, Name)
@@ -790,7 +793,6 @@ bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II,
 REVERTIBLE_TYPE_TRAIT(__is_fundamental);
 REVERTIBLE_TYPE_TRAIT(__is_integral);
 REVERTIBLE_TYPE_TRAIT(__is_interface_class);
-REVERTIBLE_TYPE_TRAIT(__is_layout_compatible);
 REVERTIBLE_TYPE_TRAIT(__is_literal);
 REVERTIBLE_TYPE_TRAIT(__is_lvalue_expr);
 REVERTIBLE_TYPE_TRAIT(__is_lvalue_reference);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) (PR #100590)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:

@cor3ntin What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/100590
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) (PR #100590)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (llvmbot)


Changes

Backport 3295d377f37a60597321f502d164b5d6b1948e28

Requested by: @Endilll

---
Full diff: https://github.com/llvm/llvm-project/pull/100590.diff


1 Files Affected:

- (modified) clang/lib/Parse/ParseExpr.cpp (+3-1) 


``diff
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 0a017ae79de75..e82b565272831 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -763,6 +763,9 @@ class CastExpressionIdValidator final : public 
CorrectionCandidateCallback {
 bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II,
tok::TokenKind *Kind) {
   if (RevertibleTypeTraits.empty()) {
+// Revertible type trait is a feature for backwards compatibility with older
+// standard libraries that declare their own structs with the same name as
+// the builtins listed below. New builtins should NOT be added to this list.
 #define RTT_JOIN(X, Y) X##Y
 #define REVERTIBLE_TYPE_TRAIT(Name)
\
   RevertibleTypeTraits[PP.getIdentifierInfo(#Name)] = RTT_JOIN(tok::kw_, Name)
@@ -790,7 +793,6 @@ bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II,
 REVERTIBLE_TYPE_TRAIT(__is_fundamental);
 REVERTIBLE_TYPE_TRAIT(__is_integral);
 REVERTIBLE_TYPE_TRAIT(__is_interface_class);
-REVERTIBLE_TYPE_TRAIT(__is_layout_compatible);
 REVERTIBLE_TYPE_TRAIT(__is_literal);
 REVERTIBLE_TYPE_TRAIT(__is_lvalue_expr);
 REVERTIBLE_TYPE_TRAIT(__is_lvalue_reference);

``




https://github.com/llvm/llvm-project/pull/100590
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) (PR #100590)

2024-07-25 Thread via llvm-branch-commits

https://github.com/cor3ntin approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/100590
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-07-25 Thread Shaw Young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/99891

>From 0274f697376264c2d77816190f9a434f64e79089 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 22 Jul 2024 11:56:23 -0700
Subject: [PATCH 01/22] Changed assignment of profiles with pseudo probe index

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 85 +++
 .../X86/match-blocks-with-pseudo-probes.test  | 25 ++
 2 files changed, 78 insertions(+), 32 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp 
b/bolt/lib/Profile/StaleProfileMatching.cpp
index 4105f626fb5b6..c135ee5ff4837 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -195,11 +195,15 @@ class StaleMatcher {
   void init(const std::vector &Blocks,
 const std::vector &Hashes,
 const std::vector &CallHashes,
-std::optional YamlBFGUID) {
+const std::unordered_map>
+IndexToBinaryPseudoProbes,
+const std::unordered_map
+BinaryPseudoProbeToBlock,
+const uint64_t YamlBFGUID) {
 assert(Blocks.size() == Hashes.size() &&
Hashes.size() == CallHashes.size() &&
"incorrect matcher initialization");
-
 for (size_t I = 0; I < Blocks.size(); I++) {
   FlowBlock *Block = Blocks[I];
   uint16_t OpHash = Hashes[I].OpcodeHash;
@@ -209,6 +213,8 @@ class StaleMatcher {
 std::make_pair(Hashes[I], Block));
   this->Blocks.push_back(Block);
 }
+this->IndexToBinaryPseudoProbes = IndexToBinaryPseudoProbes;
+this->BinaryPseudoProbeToBlock = BinaryPseudoProbeToBlock;
 this->YamlBFGUID = YamlBFGUID;
   }
 
@@ -234,10 +240,14 @@ class StaleMatcher {
   using HashBlockPairType = std::pair;
   std::unordered_map> OpHashToBlocks;
   std::unordered_map> 
CallHashToBlocks;
-  std::vector Blocks;
+  std::unordered_map>
+  IndexToBinaryPseudoProbes;
+  std::unordered_map
+  BinaryPseudoProbeToBlock;
+  std::vector Blocks;
   // If the pseudo probe checksums of the profiled and binary functions are
   // equal, then the YamlBF's GUID is defined and used to match blocks.
-  std::optional YamlBFGUID;
+  uint64_t YamlBFGUID;
 
   // Uses OpcodeHash to find the most similar block for a given hash.
   const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const {
@@ -284,7 +294,7 @@ class StaleMatcher {
 // Searches for the pseudo probe attached to the matched function's block,
 // ignoring pseudo probes attached to function calls and inlined functions'
 // blocks.
-outs() << "match with pseudo probes\n";
+std::vector BlockPseudoProbes;
 for (const auto &PseudoProbe : PseudoProbes) {
   // Ensures that pseudo probe information belongs to the appropriate
   // function and not an inlined function.
@@ -293,11 +303,30 @@ class StaleMatcher {
   // Skips pseudo probes attached to function calls.
   if (PseudoProbe.Type != static_cast(PseudoProbeType::Block))
 continue;
-  assert(PseudoProbe.Index < Blocks.size() &&
- "pseudo probe index out of range");
-  return Blocks[PseudoProbe.Index];
+
+  BlockPseudoProbes.push_back(&PseudoProbe);
 }
-return nullptr;
+
+// Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo
+// probe and binary pseudo probe.
+if (BlockPseudoProbes.size() == 0 || BlockPseudoProbes.size() > 1)
+  return nullptr;
+
+uint64_t Index = BlockPseudoProbes[0]->Index;
+assert(Index < Blocks.size() && "Invalid pseudo probe index");
+
+auto It = IndexToBinaryPseudoProbes.find(Index);
+assert(It != IndexToBinaryPseudoProbes.end() &&
+   "All blocks should have a pseudo probe");
+if (It->second.size() > 1)
+  return nullptr;
+
+const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0];
+auto BinaryPseudoProbeIt = 
BinaryPseudoProbeToBlock.find(BinaryPseudoProbe);
+assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() &&
+   "All binary pseudo probes should belong a binary basic block");
+
+return BinaryPseudoProbeIt->second;
   }
 };
 
@@ -491,6 +520,11 @@ size_t matchWeightsByHashes(
   std::vector CallHashes;
   std::vector Blocks;
   std::vector BlendedHashes;
+  std::unordered_map>
+  IndexToBinaryPseudoProbes;
+  std::unordered_map
+  BinaryPseudoProbeToBlock;
+  const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
 const BinaryBasicBlock *BB = BlockOrder[I];
 assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
@@ -510,9 +544,27 @@ size_t matchWeightsByHashes(
 Blocks.push_back(&Func.Blocks[I + 1]);
 BlendedBlockHash BlendedHash(BB->getHash());
 BlendedHashes.push_back(BlendedHash);
+if (PseudoProbeDecoder) {
+  const AddressProbesMap &ProbeMap =
+  PseudoProbeDecoder->get

[llvm-branch-commits] [libcxx] release/19.x: [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) (PR #100604)

2024-07-25 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/100604

Backport a55df237375e98cfc2520d5eb1a23b302ef02ba0

Requested by: @ldionne

>From 342755c855bbc6873b4677a74812759c6a80cdae Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Thu, 25 Jul 2024 12:16:48 -0500
Subject: [PATCH] [libc++] Add missing xlocale.h include on Apple and FreeBSD
 (#99689)

The `` header uses `strtoll_l` and friends which are defined in
`` on these platforms. While this works via transitive
includes when modules are disabled, this doesn't work anymore if the
platforms are modularized properly.

(cherry picked from commit a55df237375e98cfc2520d5eb1a23b302ef02ba0)
---
 libcxx/include/locale | 4 
 1 file changed, 4 insertions(+)

diff --git a/libcxx/include/locale b/libcxx/include/locale
index dbec23a2c936d..573910a85bef5 100644
--- a/libcxx/include/locale
+++ b/libcxx/include/locale
@@ -232,6 +232,10 @@ template  class messages_byname;
 #include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h>
 #  endif
 
+#  if defined(__APPLE__) || defined(__FreeBSD__)
+#include 
+#  endif
+
 #  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
 #  endif

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/19.x: [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) (PR #100604)

2024-07-25 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/100604
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/19.x: [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) (PR #100604)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:

@philnik777 What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/100604
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/19.x: [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) (PR #100604)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-libcxx

Author: None (llvmbot)


Changes

Backport a55df237375e98cfc2520d5eb1a23b302ef02ba0

Requested by: @ldionne

---
Full diff: https://github.com/llvm/llvm-project/pull/100604.diff


1 Files Affected:

- (modified) libcxx/include/locale (+4) 


``diff
diff --git a/libcxx/include/locale b/libcxx/include/locale
index dbec23a2c936d..573910a85bef5 100644
--- a/libcxx/include/locale
+++ b/libcxx/include/locale
@@ -232,6 +232,10 @@ template  class messages_byname;
 #include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h>
 #  endif
 
+#  if defined(__APPLE__) || defined(__FreeBSD__)
+#include 
+#  endif
+
 #  if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
 #  endif

``




https://github.com/llvm/llvm-project/pull/100604
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/100522
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100522

>From df2b6b7c749629f0ea50f7772329b48ba9450f2f Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:47:03 +0400
Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics

---
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 310 +
 1 file changed, 310 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll

diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
new file mode 100644
index 0..f65615b07abc0
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -0,0 +1,310 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck 
-check-prefixes=SLOW-SIZE %s
+; END.
+
+define void @abs_nonpoison() {
+; FAST-LABEL: 'abs_nonpoison'
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 
= call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: 
%V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = 
call i32 @llvm.abs.i32(i32 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 
= call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: 
%V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: 
%V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: 
%V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = 
call i8 @llvm.abs.i8(i8 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I8 
= call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I8 
= call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4

[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100522

>From df2b6b7c749629f0ea50f7772329b48ba9450f2f Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:47:03 +0400
Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics

---
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 310 +
 1 file changed, 310 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll

diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
new file mode 100644
index 0..f65615b07abc0
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -0,0 +1,310 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST %s
+; RUN: opt -passes="print" 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck 
-check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print" -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck 
-check-prefixes=SLOW-SIZE %s
+; END.
+
+define void @abs_nonpoison() {
+; FAST-LABEL: 'abs_nonpoison'
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 
= call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: 
%V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: 
%V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = 
call i32 @llvm.abs.i32(i32 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 
= call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: 
%V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: 
%V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: 
%V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = 
call i8 @llvm.abs.i8(i8 undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I8 
= call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I8 
= call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4

[llvm-branch-commits] [llvm] TTI: Fix special casing vectorization costs of saturating add/sub (PR #97463)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/97463
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/100514
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100519

>From 5a2e8acf2b7e4aafae237a035f81557d97948a29 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:27:54 +0400
Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  67 +-
 .../Analysis/CostModel/X86/arith-overflow.ll  | 120 +-
 2 files changed, 96 insertions(+), 91 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a89d4fe467eb9..314390aee5085 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBO;
   break;
 case Intrinsic::smul_with_overflow:
-case Intrinsic::umul_with_overflow: {
-  Type *MulTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
-  bool IsSigned = IID == Intrinsic::smul_with_overflow;
-
-  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  if (IsSigned)
-Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
-CostKind,
-{TTI::OK_AnyValue, 
TTI::OP_None},
-{TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  Cost += thisT()->getCmpSelInstrCost(
-  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULO;
+  break;
+case Intrinsic::umul_with_overflow:
+  ISD = ISD::UMULO;
+  break;
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
@@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   OverflowTy, Pred, CostKind);
   return Cost;
 }
+case Intrinsic::smul_with_overflow:
+case Intrinsic::umul_with_overflow: {
+  Type *MulTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+  bool IsSigned = IID == Intrinsic::smul_with_overflow;
+
+  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  if (IsSigned)
+Cost += thisT()->getArithmeticInstrCost(
+Instruction::AShr, MulTy, CostKind,
+{TTI::OK_AnyValue, TTI::OP_None},
+{TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  Cost += thisT()->getCmpSelInstrCost(
+  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
+  return Cost;
+}
 case Intrinsic::sadd_sat:
 case Intrinsic::ssub_sat: {
   // Assume a default expansion.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll 
b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index c5da46af04367..28d53042d4c21 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x 
i16> undef, <16 x i16> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 148 f

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100520

>From 39ca2c43676bf82f97f8cce2e09091e7d849dfab Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:31:04 +0400
Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +---
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 314390aee5085..1a089a3fa9634 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBSAT;
   break;
 case Intrinsic::smul_fix:
-case Intrinsic::umul_fix: {
-  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
-
-  unsigned ExtOp =
-  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, 
CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULFIX;
+  break;
+case Intrinsic::umul_fix:
+  ISD = ISD::UMULFIX;
+  break;
 case Intrinsic::sadd_with_overflow:
   ISD = ISD::SADDO;
   break;
@@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   CmpInst::BAD_ICMP_PREDICATE, CostKind);
   return Cost;
 }
+case Intrinsic::smul_fix:
+case Intrinsic::umul_fix: {
+  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
+
+  unsigned ExtOp =
+  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
+  return Cost;
+}
 default:
   break;
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100523

>From 85c14e04d3e27c8609fac2890eb475963d7f008b Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h   | 32 +
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 40 +++---
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index f65615b07abc0..e290f0631ff16 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -24,11 +24,11 @@ define void @abs_nonpoison() {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: 
%V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = 
call i16 @llvm.abs.i16(i16 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 
= call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: 
%V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 
= call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 
= call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 

[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/100523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-07-25 Thread Shaw Young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/99891

>From 0274f697376264c2d77816190f9a434f64e79089 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 22 Jul 2024 11:56:23 -0700
Subject: [PATCH 01/23] Changed assignment of profiles with pseudo probe index

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 85 +++
 .../X86/match-blocks-with-pseudo-probes.test  | 25 ++
 2 files changed, 78 insertions(+), 32 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp 
b/bolt/lib/Profile/StaleProfileMatching.cpp
index 4105f626fb5b6..c135ee5ff4837 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -195,11 +195,15 @@ class StaleMatcher {
   void init(const std::vector &Blocks,
 const std::vector &Hashes,
 const std::vector &CallHashes,
-std::optional YamlBFGUID) {
+const std::unordered_map>
+IndexToBinaryPseudoProbes,
+const std::unordered_map
+BinaryPseudoProbeToBlock,
+const uint64_t YamlBFGUID) {
 assert(Blocks.size() == Hashes.size() &&
Hashes.size() == CallHashes.size() &&
"incorrect matcher initialization");
-
 for (size_t I = 0; I < Blocks.size(); I++) {
   FlowBlock *Block = Blocks[I];
   uint16_t OpHash = Hashes[I].OpcodeHash;
@@ -209,6 +213,8 @@ class StaleMatcher {
 std::make_pair(Hashes[I], Block));
   this->Blocks.push_back(Block);
 }
+this->IndexToBinaryPseudoProbes = IndexToBinaryPseudoProbes;
+this->BinaryPseudoProbeToBlock = BinaryPseudoProbeToBlock;
 this->YamlBFGUID = YamlBFGUID;
   }
 
@@ -234,10 +240,14 @@ class StaleMatcher {
   using HashBlockPairType = std::pair;
   std::unordered_map> OpHashToBlocks;
   std::unordered_map> 
CallHashToBlocks;
-  std::vector Blocks;
+  std::unordered_map>
+  IndexToBinaryPseudoProbes;
+  std::unordered_map
+  BinaryPseudoProbeToBlock;
+  std::vector Blocks;
   // If the pseudo probe checksums of the profiled and binary functions are
   // equal, then the YamlBF's GUID is defined and used to match blocks.
-  std::optional YamlBFGUID;
+  uint64_t YamlBFGUID;
 
   // Uses OpcodeHash to find the most similar block for a given hash.
   const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const {
@@ -284,7 +294,7 @@ class StaleMatcher {
 // Searches for the pseudo probe attached to the matched function's block,
 // ignoring pseudo probes attached to function calls and inlined functions'
 // blocks.
-outs() << "match with pseudo probes\n";
+std::vector BlockPseudoProbes;
 for (const auto &PseudoProbe : PseudoProbes) {
   // Ensures that pseudo probe information belongs to the appropriate
   // function and not an inlined function.
@@ -293,11 +303,30 @@ class StaleMatcher {
   // Skips pseudo probes attached to function calls.
   if (PseudoProbe.Type != static_cast(PseudoProbeType::Block))
 continue;
-  assert(PseudoProbe.Index < Blocks.size() &&
- "pseudo probe index out of range");
-  return Blocks[PseudoProbe.Index];
+
+  BlockPseudoProbes.push_back(&PseudoProbe);
 }
-return nullptr;
+
+// Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo
+// probe and binary pseudo probe.
+if (BlockPseudoProbes.size() == 0 || BlockPseudoProbes.size() > 1)
+  return nullptr;
+
+uint64_t Index = BlockPseudoProbes[0]->Index;
+assert(Index < Blocks.size() && "Invalid pseudo probe index");
+
+auto It = IndexToBinaryPseudoProbes.find(Index);
+assert(It != IndexToBinaryPseudoProbes.end() &&
+   "All blocks should have a pseudo probe");
+if (It->second.size() > 1)
+  return nullptr;
+
+const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0];
+auto BinaryPseudoProbeIt = 
BinaryPseudoProbeToBlock.find(BinaryPseudoProbe);
+assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() &&
+   "All binary pseudo probes should belong a binary basic block");
+
+return BinaryPseudoProbeIt->second;
   }
 };
 
@@ -491,6 +520,11 @@ size_t matchWeightsByHashes(
   std::vector CallHashes;
   std::vector Blocks;
   std::vector BlendedHashes;
+  std::unordered_map>
+  IndexToBinaryPseudoProbes;
+  std::unordered_map
+  BinaryPseudoProbeToBlock;
+  const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
 const BinaryBasicBlock *BB = BlockOrder[I];
 assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
@@ -510,9 +544,27 @@ size_t matchWeightsByHashes(
 Blocks.push_back(&Func.Blocks[I + 1]);
 BlendedBlockHash BlendedHash(BB->getHash());
 BlendedHashes.push_back(BlendedHash);
+if (PseudoProbeDecoder) {
+  const AddressProbesMap &ProbeMap =
+  PseudoProbeDecoder->get

[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-07-25 Thread Shaw Young via llvm-branch-commits


@@ -478,10 +605,31 @@ matchWeightsByHashes(BinaryContext &BC,
 Blocks.push_back(&Func.Blocks[I + 1]);
 BlendedBlockHash BlendedHash(BB->getHash());
 BlendedHashes.push_back(BlendedHash);
+// Collects pseudo probes attached to the BB for use in the StaleMatcher.
+if (opts::ProfileUsePseudoProbes && PseudoProbeDecoder) {
+  const AddressProbesMap &ProbeMap =
+  PseudoProbeDecoder->getAddress2ProbesMap();
+  const uint64_t FuncAddr = BF.getAddress();
+  const std::pair &BlockRange =
+  BB->getInputAddressRange();
+  const auto &BlockProbes =
+  llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
+   ProbeMap.lower_bound(FuncAddr + BlockRange.second));
+  for (const auto &[_, Probes] : BlockProbes) {
+for (const MCDecodedPseudoProbe &Probe : Probes) {
+  if (Probe.getInlineTreeNode()->hasInlineSite())

shawbyoung wrote:

Just added inlined block pseudo probe matching.

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100523

>From 949edfeeecddb315bf95dd82be99c57a4711c30a Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

Also adjust the AMDGPU cost.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  32 +-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  |   9 +-
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +-
 3 files changed, 210 insertions(+), 199 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0b1ecc002ae25..8ae236850b982 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID 
ID) {
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+  case Intrinsic::abs:
 return true;
   default:
 return false;
@@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   if (SLT == MVT::f64)
 return LT.first * NElts * get64BitInstrCost(CostKind);
 
-  if ((ST->has16BitInsts() && SLT == MVT::f16) ||
+  if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
 
@@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+// TODO: Full rate for i32/i16
 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
 if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; }))
   NElts = 1;
 break;
+  case Intrinsic::abs:
+// Expansion takes 2 instructions for VALU
+if (SLT == MVT::i16 || SLT == MVT::i32)
+  InstRate = 2 * getFullRateInstrCost();
+break;
   }
 
   return LT.first * NElts * InstRate;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index f65615b07abc0..b86e99558377b 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -14,116 +14,116 @@ define void @abs_nonpoison() {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instructi

[llvm-branch-commits] [libcxx] release/19.x: [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) (PR #100604)

2024-07-25 Thread Nikolas Klauser via llvm-branch-commits

https://github.com/philnik777 approved this pull request.


https://github.com/llvm/llvm-project/pull/100604
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-07-25 Thread Lei Wang via llvm-branch-commits


@@ -478,10 +675,34 @@ matchWeightsByHashes(BinaryContext &BC,
 Blocks.push_back(&Func.Blocks[I + 1]);
 BlendedBlockHash BlendedHash(BB->getHash());
 BlendedHashes.push_back(BlendedHash);
+// Collects pseudo probes attached to the BB for use in the StaleMatcher.
+if (opts::ProfileUsePseudoProbes &&
+opts::StaleMatchingWithBlockPseudoProbes && PseudoProbeDecoder) {
+  const AddressProbesMap &ProbeMap =
+  PseudoProbeDecoder->getAddress2ProbesMap();
+  const uint64_t FuncAddr = BF.getAddress();
+  const std::pair &BlockRange =
+  BB->getInputAddressRange();
+  const auto &BlockProbes =
+  llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
+   ProbeMap.lower_bound(FuncAddr + BlockRange.second));
+  for (const auto &[_, Probes] : BlockProbes) {
+for (const MCDecodedPseudoProbe &Probe : Probes) {
+  if (Probe.getType() != static_cast(PseudoProbeType::Block))
+continue;
+  if (Probe.getInlineTreeNode()->hasInlineSite())
+Matcher.mapGUIDAndIndexToProbe(Probe.getGuid(), Probe.getIndex(),
+   &Probe);
+  else
+Matcher.mapIndexToProbe(Probe.getIndex(), &Probe);

wlei-llvm wrote:

Wondering why we need to use two containers? iiuc, all the probes contain the 
`GUID`, the top-level probes should contain the function GUID.

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-07-25 Thread Lei Wang via llvm-branch-commits


@@ -0,0 +1,62 @@
+## Tests stale block matching with pseudo probes.
+
+# REQUIRES: system-linux
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
+# RUN:   --print-cfg --funcs=main --profile-ignore-hash=0 
--infer-stale-profile --profile-use-pseudo-probes 
--stale-matching-with-block-pseudo-probes 2>&1 | FileCheck %s
+
+# CHECK: BOLT-INFO: inference found a pseudo probe match for 100.00% of basic 
blocks (1 out of 1 stale) responsible for -nan% samples (0 out of 0 stale)
+
+#--- main.s
+ .text
+  .globl  main# -- Begin function main
+  .p2align4, 0x90
+  .type   main,@function
+main:   # @main
+# %bb.0:
+  pushq   %rbp
+  movq%rsp, %rbp
+  movl$0, -4(%rbp)
+  .pseudoprobe15822663052811949562 1 0 0 main

wlei-llvm wrote:

Consider to add inlining case? (I guess one big reason we want to use 
pseudo-probe is to deal with inlining)

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-07-25 Thread Lei Wang via llvm-branch-commits


@@ -116,6 +118,11 @@ cl::opt StaleMatchingCostJumpUnknownFTInc(
 "The cost of increasing an unknown fall-through jump count by one."),
 cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory));
 
+cl::opt StaleMatchingWithBlockPseudoProbes(
+"stale-matching-with-block-pseudo-probes",
+cl::desc("Turns on stale matching with block pseudo probes."), cl::init(3),

wlei-llvm wrote:

`cl::init(3)` should it be bool value?

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-07-25 Thread Lei Wang via llvm-branch-commits


@@ -266,6 +325,123 @@ class StaleMatcher {
 }
 return BestBlock;
   }
+
+  /// A helper function for logging.
+  static bool LogErrIfExpr(bool Expr, std::string Message) {

wlei-llvm wrote:

Nit: `std::string` can this be `StringRef`?

https://github.com/llvm/llvm-project/pull/99891
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)

2024-07-25 Thread Serge Pavlov via llvm-branch-commits

spavloff wrote:

Just as with #100378 the changes in tests demonstrates that the produced code 
becomes worse. In what cases this patch makes improvement? Can it be limited to 
such cases?

https://github.com/llvm/llvm-project/pull/100380
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)

2024-07-25 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner created 
https://github.com/llvm/llvm-project/pull/100622

None


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)

2024-07-25 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner created 
https://github.com/llvm/llvm-project/pull/100623

This makes the binding structure in a DXILResource default to empty
and need a separate call to set up, and also moves the unique ID into
it since bindings are the only place where those are actually used.

This will put us in a better position when dealing with resource
handles in libraries.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: Justin Bogner (bogner)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/100622.diff


2 Files Affected:

- (modified) llvm/include/llvm/Analysis/DXILResource.h (+2-1) 
- (modified) llvm/lib/Analysis/DXILResource.cpp (+1) 


``diff
diff --git a/llvm/include/llvm/Analysis/DXILResource.h 
b/llvm/include/llvm/Analysis/DXILResource.h
index ac1cefd98dbe3..cca5e0f0bd759 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -9,11 +9,12 @@
 #ifndef LLVM_ANALYSIS_DXILRESOURCE_H
 #define LLVM_ANALYSIS_DXILRESOURCE_H
 
-#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/DXILABI.h"
 
 namespace llvm {
+class MDTuple;
+
 namespace dxil {
 
 struct ResourceBinding {
diff --git a/llvm/lib/Analysis/DXILResource.cpp 
b/llvm/lib/Analysis/DXILResource.cpp
index cbe634c4b91aa..d47a73c05a3e5 100644
--- a/llvm/lib/Analysis/DXILResource.cpp
+++ b/llvm/lib/Analysis/DXILResource.cpp
@@ -9,6 +9,7 @@
 #include "llvm/Analysis/DXILResource.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Metadata.h"
 
 using namespace llvm;
 using namespace dxil;

``




https://github.com/llvm/llvm-project/pull/100622
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: Justin Bogner (bogner)


Changes

This makes the binding structure in a DXILResource default to empty
and need a separate call to set up, and also moves the unique ID into
it since bindings are the only place where those are actually used.

This will put us in a better position when dealing with resource
handles in libraries.


---

Patch is 27.00 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/100623.diff


3 Files Affected:

- (modified) llvm/include/llvm/Analysis/DXILResource.h (+41-47) 
- (modified) llvm/lib/Analysis/DXILResource.cpp (+35-57) 
- (modified) llvm/unittests/Analysis/DXILResourceTest.cpp (+39-39) 


``diff
diff --git a/llvm/include/llvm/Analysis/DXILResource.h 
b/llvm/include/llvm/Analysis/DXILResource.h
index cca5e0f0bd759..d4006ae10837c 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -17,19 +17,22 @@ class MDTuple;
 
 namespace dxil {
 
-struct ResourceBinding {
-  uint32_t Space;
-  uint32_t LowerBound;
-  uint32_t Size;
-
-  bool operator==(const ResourceBinding &RHS) const {
-return std::tie(Space, LowerBound, Size) ==
-   std::tie(RHS.Space, RHS.LowerBound, RHS.Size);
-  }
-  bool operator!=(const ResourceBinding &RHS) const { return !(*this == RHS); }
-};
-
 class ResourceInfo {
+  struct ResourceBinding {
+uint32_t UniqueID;
+uint32_t Space;
+uint32_t LowerBound;
+uint32_t Size;
+
+bool operator==(const ResourceBinding &RHS) const {
+  return std::tie(UniqueID, Space, LowerBound, Size) ==
+ std::tie(RHS.UniqueID, RHS.Space, RHS.LowerBound, RHS.Size);
+}
+bool operator!=(const ResourceBinding &RHS) const {
+  return !(*this == RHS);
+}
+  };
+
   struct UAVInfo {
 bool GloballyCoherent;
 bool HasCounter;
@@ -81,12 +84,11 @@ class ResourceInfo {
   Value *Symbol;
   StringRef Name;
 
-  ResourceBinding Binding;
-  uint32_t UniqueID;
-
   dxil::ResourceClass RC;
   dxil::ResourceKind Kind;
 
+  ResourceBinding Binding = {};
+
   // Resource class dependent properties.
   // CBuffer, Sampler, and RawBuffer end here.
   union {
@@ -114,70 +116,62 @@ class ResourceInfo {
   bool isMultiSample() const;
 
   ResourceInfo(dxil::ResourceClass RC, dxil::ResourceKind Kind, Value *Symbol,
-   StringRef Name, ResourceBinding Binding, uint32_t UniqueID)
-  : Symbol(Symbol), Name(Name), Binding(Binding), UniqueID(UniqueID),
-RC(RC), Kind(Kind) {}
+   StringRef Name)
+  : Symbol(Symbol), Name(Name), RC(RC), Kind(Kind) {}
 
 public:
   static ResourceInfo SRV(Value *Symbol, StringRef Name,
-  ResourceBinding Binding, uint32_t UniqueID,
   dxil::ElementType ElementTy, uint32_t ElementCount,
   dxil::ResourceKind Kind);
-  static ResourceInfo RawBuffer(Value *Symbol, StringRef Name,
-ResourceBinding Binding, uint32_t UniqueID);
+  static ResourceInfo RawBuffer(Value *Symbol, StringRef Name);
   static ResourceInfo StructuredBuffer(Value *Symbol, StringRef Name,
-   ResourceBinding Binding,
-   uint32_t UniqueID, uint32_t Stride,
-   Align Alignment);
+   uint32_t Stride, Align Alignment);
   static ResourceInfo Texture2DMS(Value *Symbol, StringRef Name,
-  ResourceBinding Binding, uint32_t UniqueID,
   dxil::ElementType ElementTy,
   uint32_t ElementCount, uint32_t SampleCount);
-  static ResourceInfo
-  Texture2DMSArray(Value *Symbol, StringRef Name, ResourceBinding Binding,
-   uint32_t UniqueID, dxil::ElementType ElementTy,
-   uint32_t ElementCount, uint32_t SampleCount);
+  static ResourceInfo Texture2DMSArray(Value *Symbol, StringRef Name,
+   dxil::ElementType ElementTy,
+   uint32_t ElementCount,
+   uint32_t SampleCount);
 
   static ResourceInfo UAV(Value *Symbol, StringRef Name,
-  ResourceBinding Binding, uint32_t UniqueID,
   dxil::ElementType ElementTy, uint32_t ElementCount,
   bool GloballyCoherent, bool IsROV,
   dxil::ResourceKind Kind);
   static ResourceInfo RWRawBuffer(Value *Symbol, StringRef Name,
-  ResourceBinding Binding, uint32_t UniqueID,
   bool GloballyCoherent, bool IsROV);
   static ResourceInfo RWStructuredBuffer(Value *Symbol, StringRef Name,
- ResourceBinding Binding,
- uint32

[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-directx

Author: Justin Bogner (bogner)


Changes

This makes the binding structure in a DXILResource default to empty
and need a separate call to set up, and also moves the unique ID into
it since bindings are the only place where those are actually used.

This will put us in a better position when dealing with resource
handles in libraries.


---

Patch is 27.00 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/100623.diff


3 Files Affected:

- (modified) llvm/include/llvm/Analysis/DXILResource.h (+41-47) 
- (modified) llvm/lib/Analysis/DXILResource.cpp (+35-57) 
- (modified) llvm/unittests/Analysis/DXILResourceTest.cpp (+39-39) 


``diff
diff --git a/llvm/include/llvm/Analysis/DXILResource.h 
b/llvm/include/llvm/Analysis/DXILResource.h
index cca5e0f0bd759..d4006ae10837c 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -17,19 +17,22 @@ class MDTuple;
 
 namespace dxil {
 
-struct ResourceBinding {
-  uint32_t Space;
-  uint32_t LowerBound;
-  uint32_t Size;
-
-  bool operator==(const ResourceBinding &RHS) const {
-return std::tie(Space, LowerBound, Size) ==
-   std::tie(RHS.Space, RHS.LowerBound, RHS.Size);
-  }
-  bool operator!=(const ResourceBinding &RHS) const { return !(*this == RHS); }
-};
-
 class ResourceInfo {
+  struct ResourceBinding {
+uint32_t UniqueID;
+uint32_t Space;
+uint32_t LowerBound;
+uint32_t Size;
+
+bool operator==(const ResourceBinding &RHS) const {
+  return std::tie(UniqueID, Space, LowerBound, Size) ==
+ std::tie(RHS.UniqueID, RHS.Space, RHS.LowerBound, RHS.Size);
+}
+bool operator!=(const ResourceBinding &RHS) const {
+  return !(*this == RHS);
+}
+  };
+
   struct UAVInfo {
 bool GloballyCoherent;
 bool HasCounter;
@@ -81,12 +84,11 @@ class ResourceInfo {
   Value *Symbol;
   StringRef Name;
 
-  ResourceBinding Binding;
-  uint32_t UniqueID;
-
   dxil::ResourceClass RC;
   dxil::ResourceKind Kind;
 
+  ResourceBinding Binding = {};
+
   // Resource class dependent properties.
   // CBuffer, Sampler, and RawBuffer end here.
   union {
@@ -114,70 +116,62 @@ class ResourceInfo {
   bool isMultiSample() const;
 
   ResourceInfo(dxil::ResourceClass RC, dxil::ResourceKind Kind, Value *Symbol,
-   StringRef Name, ResourceBinding Binding, uint32_t UniqueID)
-  : Symbol(Symbol), Name(Name), Binding(Binding), UniqueID(UniqueID),
-RC(RC), Kind(Kind) {}
+   StringRef Name)
+  : Symbol(Symbol), Name(Name), RC(RC), Kind(Kind) {}
 
 public:
   static ResourceInfo SRV(Value *Symbol, StringRef Name,
-  ResourceBinding Binding, uint32_t UniqueID,
   dxil::ElementType ElementTy, uint32_t ElementCount,
   dxil::ResourceKind Kind);
-  static ResourceInfo RawBuffer(Value *Symbol, StringRef Name,
-ResourceBinding Binding, uint32_t UniqueID);
+  static ResourceInfo RawBuffer(Value *Symbol, StringRef Name);
   static ResourceInfo StructuredBuffer(Value *Symbol, StringRef Name,
-   ResourceBinding Binding,
-   uint32_t UniqueID, uint32_t Stride,
-   Align Alignment);
+   uint32_t Stride, Align Alignment);
   static ResourceInfo Texture2DMS(Value *Symbol, StringRef Name,
-  ResourceBinding Binding, uint32_t UniqueID,
   dxil::ElementType ElementTy,
   uint32_t ElementCount, uint32_t SampleCount);
-  static ResourceInfo
-  Texture2DMSArray(Value *Symbol, StringRef Name, ResourceBinding Binding,
-   uint32_t UniqueID, dxil::ElementType ElementTy,
-   uint32_t ElementCount, uint32_t SampleCount);
+  static ResourceInfo Texture2DMSArray(Value *Symbol, StringRef Name,
+   dxil::ElementType ElementTy,
+   uint32_t ElementCount,
+   uint32_t SampleCount);
 
   static ResourceInfo UAV(Value *Symbol, StringRef Name,
-  ResourceBinding Binding, uint32_t UniqueID,
   dxil::ElementType ElementTy, uint32_t ElementCount,
   bool GloballyCoherent, bool IsROV,
   dxil::ResourceKind Kind);
   static ResourceInfo RWRawBuffer(Value *Symbol, StringRef Name,
-  ResourceBinding Binding, uint32_t UniqueID,
   bool GloballyCoherent, bool IsROV);
   static ResourceInfo RWStructuredBuffer(Value *Symbol, StringRef Name,
- ResourceBinding Binding,
- uint

[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)

2024-07-25 Thread Xiang Li via llvm-branch-commits

https://github.com/python3kgae approved this pull request.


https://github.com/llvm/llvm-project/pull/100622
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)

2024-07-25 Thread Joshua Batista via llvm-branch-commits

https://github.com/bob80905 approved this pull request.


https://github.com/llvm/llvm-project/pull/100622
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)

2024-07-25 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner updated 
https://github.com/llvm/llvm-project/pull/100622


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)

2024-07-25 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner updated 
https://github.com/llvm/llvm-project/pull/100622


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)

2024-07-25 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner updated 
https://github.com/llvm/llvm-project/pull/100623


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)

2024-07-25 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner updated 
https://github.com/llvm/llvm-project/pull/100623


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)

2024-07-25 Thread Joshua Batista via llvm-branch-commits

https://github.com/bob80905 approved this pull request.

LGTM, makes sense. Weird how github highlights `UniqueID`, maybe it's some sort 
of special keyword? It might be worth renaming, but very small nit.

https://github.com/llvm/llvm-project/pull/100623
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)

2024-07-25 Thread Damyan Pepper via llvm-branch-commits

https://github.com/damyanp approved this pull request.


https://github.com/llvm/llvm-project/pull/100623
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: Normalize ptrauth handling in sanitizer runtime (#100483) (PR #100634)

2024-07-25 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/100634

Backport cc4f98979b079b517edd8a71f56a8975f436e63d

Requested by: @asl

>From 018a8c72cc75b9bb4dcb88a07bdda31454c78ca1 Mon Sep 17 00:00:00 2001
From: Anton Korobeynikov 
Date: Thu, 25 Jul 2024 11:57:46 -0700
Subject: [PATCH] Normalize ptrauth handling in sanitizer runtime (#100483)

1. Include `ptrauth.h` if `ptrauth_intrinsics` language feature is specified 
(per ptrauth spec, this is what enables `ptrauh.h` usage and functions like 
`ptrauth_strip`)
 2. For PAC-RET fallback implement two changes:
1. Switch to macro, so we can ignore key argument
2. Ensure the unsigned value is erased from LR, so the possibility of 
gadget reuse is reduced.

Fixes #100467

(cherry picked from commit cc4f98979b079b517edd8a71f56a8975f436e63d)
---
 .../lib/sanitizer_common/sanitizer_ptrauth.h  | 46 ++-
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h 
b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
index 5200354694851..b5215c0d49c06 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
@@ -9,31 +9,33 @@
 #ifndef SANITIZER_PTRAUTH_H
 #define SANITIZER_PTRAUTH_H
 
-#if __has_feature(ptrauth_calls)
-#include 
+#if __has_feature(ptrauth_intrinsics)
+#  include 
 #elif defined(__ARM_FEATURE_PAC_DEFAULT) && !defined(__APPLE__)
-inline unsigned long ptrauth_strip(void* __value, unsigned int __key) {
-  // On the stack the link register is protected with Pointer
-  // Authentication Code when compiled with -mbranch-protection.
-  // Let's stripping the PAC unconditionally because xpaclri is in
-  // the NOP space so will do nothing when it is not enabled or not available.
-  unsigned long ret;
-  asm volatile(
-  "mov x30, %1\n\t"
-  "hint #7\n\t"  // xpaclri
-  "mov %0, x30\n\t"
-  : "=r"(ret)
-  : "r"(__value)
-  : "x30");
-  return ret;
-}
-#define ptrauth_auth_data(__value, __old_key, __old_data) __value
-#define ptrauth_string_discriminator(__string) ((int)0)
+// On the stack the link register is protected with Pointer
+// Authentication Code when compiled with -mbranch-protection.
+// Let's stripping the PAC unconditionally because xpaclri is in
+// the NOP space so will do nothing when it is not enabled or not available.
+#  define ptrauth_strip(__value, __key) \
+({  \
+  unsigned long ret;\
+  asm volatile( \
+  "mov x30, %1\n\t" \
+  "hint #7\n\t" \
+  "mov %0, x30\n\t" \
+  "mov x30, xzr\n\t"\
+  : "=r"(ret)   \
+  : "r"(__value)\
+  : "x30"); \
+  ret;  \
+})
+#  define ptrauth_auth_data(__value, __old_key, __old_data) __value
+#  define ptrauth_string_discriminator(__string) ((int)0)
 #else
 // Copied from 
-#define ptrauth_strip(__value, __key) __value
-#define ptrauth_auth_data(__value, __old_key, __old_data) __value
-#define ptrauth_string_discriminator(__string) ((int)0)
+#  define ptrauth_strip(__value, __key) __value
+#  define ptrauth_auth_data(__value, __old_key, __old_data) __value
+#  define ptrauth_string_discriminator(__string) ((int)0)
 #endif
 
 #define STRIP_PAC_PC(pc) ((uptr)ptrauth_strip(pc, 0))

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: Normalize ptrauth handling in sanitizer runtime (#100483) (PR #100634)

2024-07-25 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/100634
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: Normalize ptrauth handling in sanitizer runtime (#100483) (PR #100634)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:

@DanielKristofKiss What do you think about merging this PR to the release 
branch?

https://github.com/llvm/llvm-project/pull/100634
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: Normalize ptrauth handling in sanitizer runtime (#100483) (PR #100634)

2024-07-25 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: None (llvmbot)


Changes

Backport cc4f98979b079b517edd8a71f56a8975f436e63d

Requested by: @asl

---
Full diff: https://github.com/llvm/llvm-project/pull/100634.diff


1 Files Affected:

- (modified) compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h (+24-22) 


``diff
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h 
b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
index 5200354694851..b5215c0d49c06 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
@@ -9,31 +9,33 @@
 #ifndef SANITIZER_PTRAUTH_H
 #define SANITIZER_PTRAUTH_H
 
-#if __has_feature(ptrauth_calls)
-#include 
+#if __has_feature(ptrauth_intrinsics)
+#  include 
 #elif defined(__ARM_FEATURE_PAC_DEFAULT) && !defined(__APPLE__)
-inline unsigned long ptrauth_strip(void* __value, unsigned int __key) {
-  // On the stack the link register is protected with Pointer
-  // Authentication Code when compiled with -mbranch-protection.
-  // Let's stripping the PAC unconditionally because xpaclri is in
-  // the NOP space so will do nothing when it is not enabled or not available.
-  unsigned long ret;
-  asm volatile(
-  "mov x30, %1\n\t"
-  "hint #7\n\t"  // xpaclri
-  "mov %0, x30\n\t"
-  : "=r"(ret)
-  : "r"(__value)
-  : "x30");
-  return ret;
-}
-#define ptrauth_auth_data(__value, __old_key, __old_data) __value
-#define ptrauth_string_discriminator(__string) ((int)0)
+// On the stack the link register is protected with Pointer
+// Authentication Code when compiled with -mbranch-protection.
+// Let's stripping the PAC unconditionally because xpaclri is in
+// the NOP space so will do nothing when it is not enabled or not available.
+#  define ptrauth_strip(__value, __key) \
+({  \
+  unsigned long ret;\
+  asm volatile( \
+  "mov x30, %1\n\t" \
+  "hint #7\n\t" \
+  "mov %0, x30\n\t" \
+  "mov x30, xzr\n\t"\
+  : "=r"(ret)   \
+  : "r"(__value)\
+  : "x30"); \
+  ret;  \
+})
+#  define ptrauth_auth_data(__value, __old_key, __old_data) __value
+#  define ptrauth_string_discriminator(__string) ((int)0)
 #else
 // Copied from 
-#define ptrauth_strip(__value, __key) __value
-#define ptrauth_auth_data(__value, __old_key, __old_data) __value
-#define ptrauth_string_discriminator(__string) ((int)0)
+#  define ptrauth_strip(__value, __key) __value
+#  define ptrauth_auth_data(__value, __old_key, __old_data) __value
+#  define ptrauth_string_discriminator(__string) ((int)0)
 #endif
 
 #define STRIP_PAC_PC(pc) ((uptr)ptrauth_strip(pc, 0))

``




https://github.com/llvm/llvm-project/pull/100634
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (PR #99283)

2024-07-25 Thread Yuxuan Chen via llvm-branch-commits

https://github.com/yuxuanchen1997 updated 
https://github.com/llvm/llvm-project/pull/99283

>From d42ce99fac00de6d35e423490f2603796a10 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen 
Date: Mon, 15 Jul 2024 15:01:39 -0700
Subject: [PATCH] Implement noalloc in CoroSplit

---
 llvm/lib/Transforms/Coroutines/CoroInternal.h |   4 +
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  | 123 ++
 llvm/lib/Transforms/Coroutines/Coroutines.cpp |  27 
 llvm/test/Transforms/Coroutines/ArgAddr.ll|   2 +-
 .../Transforms/Coroutines/coro-alloca-07.ll   |   2 +-
 .../coro-alloca-loop-carried-address.ll   |   2 +-
 .../Coroutines/coro-lifetime-end.ll   |   6 +-
 .../Coroutines/coro-spill-after-phi.ll|   2 +-
 .../Transforms/Coroutines/coro-split-00.ll|   7 +
 9 files changed, 142 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h 
b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 5716fd0ea4ab9..d91cccd99a703 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -26,6 +26,10 @@ bool declaresIntrinsics(const Module &M,
 const std::initializer_list);
 void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
 
+void suppressCoroAllocs(CoroIdInst *CoroId);
+void suppressCoroAllocs(LLVMContext &Context,
+ArrayRef CoroAllocs);
+
 /// Attempts to rewrite the location operand of debug intrinsics in terms of
 /// the coroutine frame pointer, folding pointer offsets into the DIExpression
 /// of the intrinsic.
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp 
b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 9e4da5f8ca961..9c0db4f29056e 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/PriorityWorklist.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/CFG.h"
@@ -1179,6 +1180,14 @@ static void 
updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
   Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
 }
 
+static TypeSize getFrameSizeForShape(coro::Shape &Shape) {
+  // In the same function all coro.sizes should have the same result type.
+  auto *SizeIntrin = Shape.CoroSizes.back();
+  Module *M = SizeIntrin->getModule();
+  const DataLayout &DL = M->getDataLayout();
+  return DL.getTypeAllocSize(Shape.FrameTy);
+}
+
 static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
   if (Shape.ABI == coro::ABI::Async)
 updateAsyncFuncPointerContextSize(Shape);
@@ -1194,10 +1203,8 @@ static void replaceFrameSizeAndAlignment(coro::Shape 
&Shape) {
 
   // In the same function all coro.sizes should have the same result type.
   auto *SizeIntrin = Shape.CoroSizes.back();
-  Module *M = SizeIntrin->getModule();
-  const DataLayout &DL = M->getDataLayout();
-  auto Size = DL.getTypeAllocSize(Shape.FrameTy);
-  auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+  auto *SizeConstant =
+  ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape));
 
   for (CoroSizeInst *CS : Shape.CoroSizes) {
 CS->replaceAllUsesWith(SizeConstant);
@@ -1455,6 +1462,64 @@ struct SwitchCoroutineSplitter {
 setCoroInfo(F, Shape, Clones);
   }
 
+  static Function *createNoAllocVariant(Function &F, coro::Shape &Shape,
+SmallVectorImpl &Clones) {
+auto *OrigFnTy = F.getFunctionType();
+auto OldParams = OrigFnTy->params();
+
+SmallVector NewParams;
+NewParams.reserve(OldParams.size() + 1);
+for (Type *T : OldParams) {
+  NewParams.push_back(T);
+}
+NewParams.push_back(PointerType::getUnqual(Shape.FrameTy));
+
+auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams,
+  OrigFnTy->isVarArg());
+Function *NoAllocF =
+Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc");
+ValueToValueMapTy VMap;
+unsigned int Idx = 0;
+for (const auto &I : F.args()) {
+  VMap[&I] = NoAllocF->getArg(Idx++);
+}
+SmallVector Returns;
+CloneFunctionInto(NoAllocF, &F, VMap,
+  CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+if (Shape.CoroBegin) {
+  auto *NewCoroBegin =
+  cast_if_present(VMap[Shape.CoroBegin]);
+  auto *NewCoroId = cast(NewCoroBegin->getId());
+  coro::replaceCoroFree(NewCoroId, /*Elide=*/true);
+  coro::suppressCoroAllocs(NewCoroId);
+  NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(Idx));
+  NewCoroBegin->eraseFromParent();
+}
+
+Module *M = F.getParent();
+M->getFunctionList().insert(M->end(), NoAllocF);
+
+removeUnreachableBlocks(*NoAllocF);
+auto NewAttrs = NoAllocF->getAttributes();
+// We just 

[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jul 25, 4:25 PM EDT**: @arsenm started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100522).


https://github.com/llvm/llvm-project/pull/100522
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100519

>From 3d683da35b98db6dd0b5a94692b735765a6f776f Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:27:54 +0400
Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  67 +-
 .../Analysis/CostModel/X86/arith-overflow.ll  | 120 +-
 2 files changed, 96 insertions(+), 91 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index a89d4fe467eb9..314390aee5085 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBO;
   break;
 case Intrinsic::smul_with_overflow:
-case Intrinsic::umul_with_overflow: {
-  Type *MulTy = RetTy->getContainedType(0);
-  Type *OverflowTy = RetTy->getContainedType(1);
-  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
-  bool IsSigned = IID == Intrinsic::smul_with_overflow;
-
-  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  if (IsSigned)
-Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
-CostKind,
-{TTI::OK_AnyValue, 
TTI::OP_None},
-{TTI::OK_UniformConstantValue, 
TTI::OP_None});
-
-  Cost += thisT()->getCmpSelInstrCost(
-  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULO;
+  break;
+case Intrinsic::umul_with_overflow:
+  ISD = ISD::UMULO;
+  break;
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
@@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   OverflowTy, Pred, CostKind);
   return Cost;
 }
+case Intrinsic::smul_with_overflow:
+case Intrinsic::umul_with_overflow: {
+  Type *MulTy = RetTy->getContainedType(0);
+  Type *OverflowTy = RetTy->getContainedType(1);
+  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+  bool IsSigned = IID == Intrinsic::smul_with_overflow;
+
+  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  if (IsSigned)
+Cost += thisT()->getArithmeticInstrCost(
+Instruction::AShr, MulTy, CostKind,
+{TTI::OK_AnyValue, TTI::OP_None},
+{TTI::OK_UniformConstantValue, TTI::OP_None});
+
+  Cost += thisT()->getCmpSelInstrCost(
+  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
+  return Cost;
+}
 case Intrinsic::sadd_sat:
 case Intrinsic::ssub_sat: {
   // Assume a default expansion.
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll 
b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index c5da46af04367..28d53042d4c21 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) {
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: 
%V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x 
i16> undef, <16 x i16> undef)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 148 f

[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100520

>From 1d17da3e7cd5253d0c7a9bb8acc5989d1e5ba615 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:31:04 +0400
Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes

---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +---
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 314390aee5085..1a089a3fa9634 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   ISD = ISD::USUBSAT;
   break;
 case Intrinsic::smul_fix:
-case Intrinsic::umul_fix: {
-  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
-  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
-
-  unsigned ExtOp =
-  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
-  TTI::CastContextHint CCH = TTI::CastContextHint::None;
-
-  InstructionCost Cost = 0;
-  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
-  Cost +=
-  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
-  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
-CCH, CostKind);
-  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
-  CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, 
CostKind,
-  {TTI::OK_AnyValue, TTI::OP_None},
-  {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
-  return Cost;
-}
+  ISD = ISD::SMULFIX;
+  break;
+case Intrinsic::umul_fix:
+  ISD = ISD::UMULFIX;
+  break;
 case Intrinsic::sadd_with_overflow:
   ISD = ISD::SADDO;
   break;
@@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   CmpInst::BAD_ICMP_PREDICATE, CostKind);
   return Cost;
 }
+case Intrinsic::smul_fix:
+case Intrinsic::umul_fix: {
+  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
+
+  unsigned ExtOp =
+  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+  TTI::CastContextHint CCH = TTI::CastContextHint::None;
+
+  InstructionCost Cost = 0;
+  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, 
CostKind);
+  Cost +=
+  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
+CCH, CostKind);
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(
+  Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
+  return Cost;
+}
 default:
   break;
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-07-25 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100523

>From 49db2b2b9855d18df6449b6dedf7e50ccc1d6265 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

Also adjust the AMDGPU cost.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  32 +-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  |   9 +-
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +-
 3 files changed, 210 insertions(+), 199 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ba70498bfb731..65f929369c1f0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0b1ecc002ae25..8ae236850b982 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID 
ID) {
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+  case Intrinsic::abs:
 return true;
   default:
 return false;
@@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   if (SLT == MVT::f64)
 return LT.first * NElts * get64BitInstrCost(CostKind);
 
-  if ((ST->has16BitInsts() && SLT == MVT::f16) ||
+  if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
 
@@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+// TODO: Full rate for i32/i16
 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
 if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; }))
   NElts = 1;
 break;
+  case Intrinsic::abs:
+// Expansion takes 2 instructions for VALU
+if (SLT == MVT::i16 || SLT == MVT::i32)
+  InstRate = 2 * getFullRateInstrCost();
+break;
   }
 
   return LT.first * NElts * InstRate;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index f65615b07abc0..b86e99558377b 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -14,116 +14,116 @@ define void @abs_nonpoison() {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call i64 @llvm.abs.i64(i64 undef, i1 false)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instructi

[llvm-branch-commits] [MC][NFC] Store MCPseudoProbeFuncDesc::FuncName as StringRef (PR #100655)

2024-07-25 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov created 
https://github.com/llvm/llvm-project/pull/100655

Reduces peak RSS in `perf2bolt --profile-use-pseudo-probes` to 16.04GiB.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)

2024-07-25 Thread Shaw Young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/99891

>From 0274f697376264c2d77816190f9a434f64e79089 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 22 Jul 2024 11:56:23 -0700
Subject: [PATCH 01/24] Changed assignment of profiles with pseudo probe index

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 85 +++
 .../X86/match-blocks-with-pseudo-probes.test  | 25 ++
 2 files changed, 78 insertions(+), 32 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp 
b/bolt/lib/Profile/StaleProfileMatching.cpp
index 4105f626fb5b6..c135ee5ff4837 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -195,11 +195,15 @@ class StaleMatcher {
   void init(const std::vector &Blocks,
 const std::vector &Hashes,
 const std::vector &CallHashes,
-std::optional YamlBFGUID) {
+const std::unordered_map>
+IndexToBinaryPseudoProbes,
+const std::unordered_map
+BinaryPseudoProbeToBlock,
+const uint64_t YamlBFGUID) {
 assert(Blocks.size() == Hashes.size() &&
Hashes.size() == CallHashes.size() &&
"incorrect matcher initialization");
-
 for (size_t I = 0; I < Blocks.size(); I++) {
   FlowBlock *Block = Blocks[I];
   uint16_t OpHash = Hashes[I].OpcodeHash;
@@ -209,6 +213,8 @@ class StaleMatcher {
 std::make_pair(Hashes[I], Block));
   this->Blocks.push_back(Block);
 }
+this->IndexToBinaryPseudoProbes = IndexToBinaryPseudoProbes;
+this->BinaryPseudoProbeToBlock = BinaryPseudoProbeToBlock;
 this->YamlBFGUID = YamlBFGUID;
   }
 
@@ -234,10 +240,14 @@ class StaleMatcher {
   using HashBlockPairType = std::pair;
   std::unordered_map> OpHashToBlocks;
   std::unordered_map> 
CallHashToBlocks;
-  std::vector Blocks;
+  std::unordered_map>
+  IndexToBinaryPseudoProbes;
+  std::unordered_map
+  BinaryPseudoProbeToBlock;
+  std::vector Blocks;
   // If the pseudo probe checksums of the profiled and binary functions are
   // equal, then the YamlBF's GUID is defined and used to match blocks.
-  std::optional YamlBFGUID;
+  uint64_t YamlBFGUID;
 
   // Uses OpcodeHash to find the most similar block for a given hash.
   const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const {
@@ -284,7 +294,7 @@ class StaleMatcher {
 // Searches for the pseudo probe attached to the matched function's block,
 // ignoring pseudo probes attached to function calls and inlined functions'
 // blocks.
-outs() << "match with pseudo probes\n";
+std::vector BlockPseudoProbes;
 for (const auto &PseudoProbe : PseudoProbes) {
   // Ensures that pseudo probe information belongs to the appropriate
   // function and not an inlined function.
@@ -293,11 +303,30 @@ class StaleMatcher {
   // Skips pseudo probes attached to function calls.
   if (PseudoProbe.Type != static_cast(PseudoProbeType::Block))
 continue;
-  assert(PseudoProbe.Index < Blocks.size() &&
- "pseudo probe index out of range");
-  return Blocks[PseudoProbe.Index];
+
+  BlockPseudoProbes.push_back(&PseudoProbe);
 }
-return nullptr;
+
+// Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo
+// probe and binary pseudo probe.
+if (BlockPseudoProbes.size() == 0 || BlockPseudoProbes.size() > 1)
+  return nullptr;
+
+uint64_t Index = BlockPseudoProbes[0]->Index;
+assert(Index < Blocks.size() && "Invalid pseudo probe index");
+
+auto It = IndexToBinaryPseudoProbes.find(Index);
+assert(It != IndexToBinaryPseudoProbes.end() &&
+   "All blocks should have a pseudo probe");
+if (It->second.size() > 1)
+  return nullptr;
+
+const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0];
+auto BinaryPseudoProbeIt = 
BinaryPseudoProbeToBlock.find(BinaryPseudoProbe);
+assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() &&
+   "All binary pseudo probes should belong a binary basic block");
+
+return BinaryPseudoProbeIt->second;
   }
 };
 
@@ -491,6 +520,11 @@ size_t matchWeightsByHashes(
   std::vector CallHashes;
   std::vector Blocks;
   std::vector BlendedHashes;
+  std::unordered_map>
+  IndexToBinaryPseudoProbes;
+  std::unordered_map
+  BinaryPseudoProbeToBlock;
+  const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
 const BinaryBasicBlock *BB = BlockOrder[I];
 assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock");
@@ -510,9 +544,27 @@ size_t matchWeightsByHashes(
 Blocks.push_back(&Func.Blocks[I + 1]);
 BlendedBlockHash BlendedHash(BB->getHash());
 BlendedHashes.push_back(BlendedHash);
+if (PseudoProbeDecoder) {
+  const AddressProbesMap &ProbeMap =
+  PseudoProbeDecoder->get

  1   2   >