[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100513 >From 80b236530103a66b8939aeb26f1d5c2be9043b5c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 2 Jul 2024 21:28:30 +0200 Subject: [PATCH] AMDGPU: Add baseline test for vectorize of integer min/max --- .../SLPVectorizer/AMDGPU/min_max.ll | 366 ++ 1 file changed, 366 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll new file mode 100644 index 0..47b0dbd6b2cff --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll @@ -0,0 +1,366 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s + +define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @uadd_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX7-NEXT:ret <2 x i16> [[INS_1]] +; +; GFX8-LABEL: @uadd_sat_v2i16( +; GFX8-NEXT: bb: +; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX8-NEXT:ret <2 x i16> [[TMP0]] +; +; GFX9-LABEL: @uadd_sat_v2i16( +; GFX9-NEXT: bb: +; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX9-NEXT:ret <2 x i16> [[TMP0]] +; +bb: + %arg0.0 = extractelement <2 x i16> %arg0, i64 0 + %arg0.1 = extractelement <2 x i16> %arg0, i64 1 + %arg1.0 = extractelement <2 x i16> %arg1, i64 0 + %arg1.1 = extractelement <2 x i16> %arg1, i64 1 + %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0) + %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1) + %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 + %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 + ret <2 x i16> %ins.1 +} + +define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @usub_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX7-NEXT:ret <2 x i16> [[INS_1]] +; +; GFX8-LABEL: @usub_sat_v2i16( +; GFX8-NEXT: bb: +; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX8-NEXT:ret <2 x i16> [[TMP0]] +; +; GFX9-LABEL: @usub_sat_v2i16( +; GFX9-NEXT: bb: +; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX9-NEXT:ret <2 x i16> [[TMP0]] +; +bb: + %arg0.0 = extractelement <2 x i16> %arg0, i64 0 + %arg0.1 = extractelement <2 x i16> %arg0, i64 1 + %arg1.0 = extractelement <2 x i16> %arg1, i64 0 + %arg1.1 = extractelement <2 x i16> %arg1, i64 1 + %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0) + %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1) + %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 + %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 + ret <2 x i16> %ins.1 +} + +define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @sadd_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] = extract
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100519 None >From c98dcbf907a6b5d085b89f06d49ee8a3bc3e9dd2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:27:54 +0400 Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 67 +- .../Analysis/CostModel/X86/arith-overflow.ll | 120 +- .../CostModel/X86/intrinsic-cost-kinds.ll | 6 +- 3 files changed, 99 insertions(+), 94 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a89d4fe467eb9..314390aee5085 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBO; break; case Intrinsic::smul_with_overflow: -case Intrinsic::umul_with_overflow: { - Type *MulTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); - bool IsSigned = IID == Intrinsic::smul_with_overflow; - - unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - - if (IsSigned) -Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, -CostKind, -{TTI::OK_AnyValue, TTI::OP_None}, -{TTI::OK_UniformConstantValue, TTI::OP_None}); - - Cost += thisT()->getCmpSelInstrCost( - BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); - return Cost; -} + ISD = ISD::SMULO; + break; +case Intrinsic::umul_with_overflow: + ISD = ISD::UMULO; + break; case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) @@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OverflowTy, Pred, CostKind); return Cost; } +case Intrinsic::smul_with_overflow: +case Intrinsic::umul_with_overflow: { + Type *MulTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; + + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + + if (IsSigned) +Cost += thisT()->getArithmeticInstrCost( +Instruction::AShr, MulTy, CostKind, +{TTI::OK_AnyValue, TTI::OP_None}, +{TTI::OK_UniformConstantValue, TTI::OP_None}); + + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); + return Cost; +} case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index ba745262d1890..2d907d87b057c 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100520 None >From 689ea8720d60ae6fc1226b929f5333adae1ce77c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:31:04 +0400 Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +--- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 314390aee5085..1a089a3fa9634 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBSAT; break; case Intrinsic::smul_fix: -case Intrinsic::umul_fix: { - unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); - - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); - return Cost; -} + ISD = ISD::SMULFIX; + break; +case Intrinsic::umul_fix: + ISD = ISD::UMULFIX; + break; case Intrinsic::sadd_with_overflow: ISD = ISD::SADDO; break; @@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } +case Intrinsic::smul_fix: +case Intrinsic::umul_fix: { + unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); + + unsigned ExtOp = + IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost( + Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); + return Cost; +} default: break; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100519 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100518 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100522 None >From 330c0e2bf40cf96b1c7778636fa739cb0c1a1f11 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:47:03 +0400 Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics --- llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 341 + 1 file changed, 341 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll new file mode 100644 index 0..133b95609bc15 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -0,0 +1,341 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s +; END. + +declare i64@llvm.abs.i64(i64, i1 immarg) +declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1 immarg) +declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1 immarg) +declare <5 x i64> @llvm.abs.v5i64(<5 x i64>, i1 immarg) +declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1 immarg) + +declare i32@llvm.abs.i32(i32, i1 immarg) +declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1 immarg) +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1 immarg) +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) +declare <9 x i32> @llvm.abs.v9i32(<9 x i32>, i1 immarg) +declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1 immarg) + +declare i16@llvm.abs.i16(i16, i1 immarg) +declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1 immarg) +declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1 immarg) +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1 immarg) +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg) +declare <17 x i16> @llvm.abs.v17i16(<17 x i16>, i1 immarg) +declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1 immarg) + +declare i8 @llvm.abs.i8(i8, i1 immarg) +declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1 immarg) +declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1 immarg) +declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1 immarg) +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1 immarg) +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1 immarg) +declare <33 x i8> @llvm.abs.v33i8(<33 x i8>, i1 immarg) +declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1 immarg) + +define i32 @abs_nonpoison(i32 %arg) { +; FAST-LABEL: 'abs_nonpoison' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100520 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100521 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100523 None >From ca78bfb62816c21172101c1f00dcead3efc472dc Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 + llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 40 +++--- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index 133b95609bc15..623e02eb8239d 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for i
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#100522** https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100522 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/100523?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#100523** https://app.graphite.dev/github/pr/llvm/llvm-project/100523?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#100522** https://app.graphite.dev/github/pr/llvm/llvm-project/100522?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100521** https://app.graphite.dev/github/pr/llvm/llvm-project/100521?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100520** https://app.graphite.dev/github/pr/llvm/llvm-project/100520?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100519** https://app.graphite.dev/github/pr/llvm/llvm-project/100519?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100518** https://app.graphite.dev/github/pr/llvm/llvm-project/100518?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100514** https://app.graphite.dev/github/pr/llvm/llvm-project/100514?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#100513** https://app.graphite.dev/github/pr/llvm/llvm-project/100513?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#97463** https://app.graphite.dev/github/pr/llvm/llvm-project/97463?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/100523 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100518 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100519 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100520 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Matt Arsenault (arsenm) Changes --- Patch is 40.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100519.diff 3 Files Affected: - (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+36-31) - (modified) llvm/test/Analysis/CostModel/X86/arith-overflow.ll (+60-60) - (modified) llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll (+3-3) ``diff diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a89d4fe467eb9..314390aee5085 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBO; break; case Intrinsic::smul_with_overflow: -case Intrinsic::umul_with_overflow: { - Type *MulTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); - bool IsSigned = IID == Intrinsic::smul_with_overflow; - - unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - - if (IsSigned) -Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, -CostKind, -{TTI::OK_AnyValue, TTI::OP_None}, -{TTI::OK_UniformConstantValue, TTI::OP_None}); - - Cost += thisT()->getCmpSelInstrCost( - BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); - return Cost; -} + ISD = ISD::SMULO; + break; +case Intrinsic::umul_with_overflow: + ISD = ISD::UMULO; + break; case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) @@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OverflowTy, Pred, CostKind); return Cost; } +case Intrinsic::smul_with_overflow: +case Intrinsic::umul_with_overflow: { + Type *MulTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; + + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + + if (IsSigned) +Cost += thisT()->getArithmeticInstrCost( +Instruction::AShr, MulTy, CostKind, +{TTI::OK_AnyValue, TTI::OP_None}, +{TTI::OK_UniformConstantValue, TTI::OP_None}); + + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); + return Cost; +} case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index ba745262d1890..2d907d87b057c 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 148 for in
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/100520.diff 1 Files Affected: - (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+29-24) ``diff diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 314390aee5085..1a089a3fa9634 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBSAT; break; case Intrinsic::smul_fix: -case Intrinsic::umul_fix: { - unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); - - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); - return Cost; -} + ISD = ISD::SMULFIX; + break; +case Intrinsic::umul_fix: + ISD = ISD::UMULFIX; + break; case Intrinsic::sadd_with_overflow: ISD = ISD::SADDO; break; @@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } +case Intrinsic::smul_fix: +case Intrinsic::umul_fix: { + unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); + + unsigned ExtOp = + IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost( + Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); + return Cost; +} default: break; } `` https://github.com/llvm/llvm-project/pull/100520 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100521 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100522 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of fptosi_sat/fptoui_sat nodes (PR #100521)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-analysis Author: Matt Arsenault (arsenm) Changes --- Patch is 127.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100521.diff 6 Files Affected: - (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+31-25) - (modified) llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll (+76-76) - (modified) llvm/test/Analysis/CostModel/ARM/fptoi_sat.ll (+58-58) - (modified) llvm/test/Analysis/CostModel/X86/fptoi_sat.ll (+200-200) - (modified) llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll (+35-44) - (modified) llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll (+35-14) ``diff diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 1a089a3fa9634..ba70498bfb731 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2179,31 +2179,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::UMULO; break; case Intrinsic::fptosi_sat: -case Intrinsic::fptoui_sat: { - if (Tys.empty()) -break; - Type *FromTy = Tys[0]; - bool IsSigned = IID == Intrinsic::fptosi_sat; - - InstructionCost Cost = 0; - IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy, - {FromTy, FromTy}); - Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind); - IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy, - {FromTy, FromTy}); - Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind); - Cost += thisT()->getCastInstrCost( - IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy, - TTI::CastContextHint::None, CostKind); - if (IsSigned) { -Type *CondTy = RetTy->getWithNewBitWidth(1); -Cost += thisT()->getCmpSelInstrCost( -BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind); -Cost += thisT()->getCmpSelInstrCost( -BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind); - } - return Cost; -} + ISD = ISD::FP_TO_SINT_SAT; + break; +case Intrinsic::fptoui_sat: + ISD = ISD::FP_TO_UINT_SAT; + break; case Intrinsic::ctpop: ISD = ISD::CTPOP; // In case of legalization use TCC_Expensive. This is cheaper than a @@ -2418,6 +2398,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::fptosi_sat: +case Intrinsic::fptoui_sat: { + if (Tys.empty()) +break; + Type *FromTy = Tys[0]; + bool IsSigned = IID == Intrinsic::fptosi_sat; + + InstructionCost Cost = 0; + IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FromTy, + {FromTy, FromTy}); + Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind); + IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FromTy, + {FromTy, FromTy}); + Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind); + Cost += thisT()->getCastInstrCost( + IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy, + TTI::CastContextHint::None, CostKind); + if (IsSigned) { +Type *CondTy = RetTy->getWithNewBitWidth(1); +Cost += thisT()->getCmpSelInstrCost( +BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind); +Cost += thisT()->getCmpSelInstrCost( +BinaryOperator::Select, RetTy, CondTy, CmpInst::FCMP_UNO, CostKind); + } + return Cost; +} default: break; } diff --git a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll index e4e29143985b2..6fbcf2a14da7d 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll @@ -34,8 +34,8 @@ define void @casts() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated co
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/100523 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Patch is 245.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100518.diff 8 Files Affected: - (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+52-38) - (modified) llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll (+12-12) - (modified) llvm/test/Analysis/CostModel/ARM/arith-overflow.ll (+152-152) - (modified) llvm/test/Analysis/CostModel/ARM/arith-ssat.ll (+86-86) - (modified) llvm/test/Analysis/CostModel/ARM/arith-usat.ll (+84-84) - (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-codesize.ll (+44-44) - (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-latency.ll (+44-44) - (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-sizelatency.ll (+44-44) ``diff diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index c842e4a2c4320..a89d4fe467eb9 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2180,44 +2180,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Cost; } case Intrinsic::sadd_with_overflow: -case Intrinsic::ssub_with_overflow: { - Type *SumTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned Opcode = IID == Intrinsic::sadd_with_overflow -? BinaryOperator::Add -: BinaryOperator::Sub; - - // Add: - // Overflow -> (Result < LHS) ^ (RHS < 0) - // Sub: - // Overflow -> (Result < LHS) ^ (RHS > 0) - InstructionCost Cost = 0; - Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += 2 * thisT()->getCmpSelInstrCost( - Instruction::ICmp, SumTy, OverflowTy, - CmpInst::ICMP_SGT, CostKind); - Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy, - CostKind); - return Cost; -} + ISD = ISD::SADDO; + break; +case Intrinsic::ssub_with_overflow: + ISD = ISD::SSUBO; + break; case Intrinsic::uadd_with_overflow: -case Intrinsic::usub_with_overflow: { - Type *SumTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned Opcode = IID == Intrinsic::uadd_with_overflow -? BinaryOperator::Add -: BinaryOperator::Sub; - CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow -? CmpInst::ICMP_ULT -: CmpInst::ICMP_UGT; - - InstructionCost Cost = 0; - Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += - thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy, - Pred, CostKind); - return Cost; -} + ISD = ISD::UADDO; + break; +case Intrinsic::usub_with_overflow: + ISD = ISD::USUBO; + break; case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: { Type *MulTy = RetTy->getContainedType(0); @@ -2296,8 +2269,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { break; } +auto *ST = dyn_cast(RetTy); +Type *LegalizeTy = ST ? ST->getContainedType(0) : RetTy; +std::pair LT = getTypeLegalizationCost(LegalizeTy); + const TargetLoweringBase *TLI = getTLI(); -std::pair LT = getTypeLegalizationCost(RetTy); if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && @@ -2353,6 +2329,44 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Pred, CostKind); return Cost; } +case Intrinsic::sadd_with_overflow: +case Intrinsic::ssub_with_overflow: { + Type *SumTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned Opcode = IID == Intrinsic::sadd_with_overflow +? BinaryOperator::Add +: BinaryOperator::Sub; + + // Add: + // Overflow -> (Result < LHS) ^ (RHS < 0) + // Sub: + // Overflow -> (Result < LHS) ^ (RHS > 0) + InstructionCost Cost = 0; + Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); + Cost += + 2 * thisT()->getCmpSelInstrCost(Instruction::ICmp, SumTy, OverflowTy, + CmpInst::ICMP_SGT, CostKind); + Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy, + CostKind); + return Cost; +} +case Intrinsic::uadd_with_overflow: +case Intrinsic::usub_
[llvm-branch-commits] [llvm] TTI: Check legalization cost of add/sub overflow ISD nodes (PR #100518)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Matt Arsenault (arsenm) Changes --- Patch is 245.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100518.diff 8 Files Affected: - (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+52-38) - (modified) llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll (+12-12) - (modified) llvm/test/Analysis/CostModel/ARM/arith-overflow.ll (+152-152) - (modified) llvm/test/Analysis/CostModel/ARM/arith-ssat.ll (+86-86) - (modified) llvm/test/Analysis/CostModel/ARM/arith-usat.ll (+84-84) - (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-codesize.ll (+44-44) - (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-latency.ll (+44-44) - (modified) llvm/test/Analysis/CostModel/X86/arith-ssat-sizelatency.ll (+44-44) ``diff diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index c842e4a2c4320..a89d4fe467eb9 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2180,44 +2180,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Cost; } case Intrinsic::sadd_with_overflow: -case Intrinsic::ssub_with_overflow: { - Type *SumTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned Opcode = IID == Intrinsic::sadd_with_overflow -? BinaryOperator::Add -: BinaryOperator::Sub; - - // Add: - // Overflow -> (Result < LHS) ^ (RHS < 0) - // Sub: - // Overflow -> (Result < LHS) ^ (RHS > 0) - InstructionCost Cost = 0; - Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += 2 * thisT()->getCmpSelInstrCost( - Instruction::ICmp, SumTy, OverflowTy, - CmpInst::ICMP_SGT, CostKind); - Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy, - CostKind); - return Cost; -} + ISD = ISD::SADDO; + break; +case Intrinsic::ssub_with_overflow: + ISD = ISD::SSUBO; + break; case Intrinsic::uadd_with_overflow: -case Intrinsic::usub_with_overflow: { - Type *SumTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned Opcode = IID == Intrinsic::uadd_with_overflow -? BinaryOperator::Add -: BinaryOperator::Sub; - CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow -? CmpInst::ICMP_ULT -: CmpInst::ICMP_UGT; - - InstructionCost Cost = 0; - Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += - thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy, - Pred, CostKind); - return Cost; -} + ISD = ISD::UADDO; + break; +case Intrinsic::usub_with_overflow: + ISD = ISD::USUBO; + break; case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: { Type *MulTy = RetTy->getContainedType(0); @@ -2296,8 +2269,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { break; } +auto *ST = dyn_cast(RetTy); +Type *LegalizeTy = ST ? ST->getContainedType(0) : RetTy; +std::pair LT = getTypeLegalizationCost(LegalizeTy); + const TargetLoweringBase *TLI = getTLI(); -std::pair LT = getTypeLegalizationCost(RetTy); if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && @@ -2353,6 +2329,44 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Pred, CostKind); return Cost; } +case Intrinsic::sadd_with_overflow: +case Intrinsic::ssub_with_overflow: { + Type *SumTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned Opcode = IID == Intrinsic::sadd_with_overflow +? BinaryOperator::Add +: BinaryOperator::Sub; + + // Add: + // Overflow -> (Result < LHS) ^ (RHS < 0) + // Sub: + // Overflow -> (Result < LHS) ^ (RHS > 0) + InstructionCost Cost = 0; + Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); + Cost += + 2 * thisT()->getCmpSelInstrCost(Instruction::ICmp, SumTy, OverflowTy, + CmpInst::ICMP_SGT, CostKind); + Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy, + CostKind); + return Cost; +} +case Intrinsic::uadd_with_overflow: +case Intrinsic::usub_w
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Patch is 36.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100522.diff 1 Files Affected: - (added) llvm/test/Analysis/CostModel/AMDGPU/abs.ll (+341) ``diff diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll new file mode 100644 index 0..133b95609bc15 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -0,0 +1,341 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s +; END. + +declare i64@llvm.abs.i64(i64, i1 immarg) +declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1 immarg) +declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1 immarg) +declare <5 x i64> @llvm.abs.v5i64(<5 x i64>, i1 immarg) +declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1 immarg) + +declare i32@llvm.abs.i32(i32, i1 immarg) +declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1 immarg) +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1 immarg) +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg) +declare <9 x i32> @llvm.abs.v9i32(<9 x i32>, i1 immarg) +declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1 immarg) + +declare i16@llvm.abs.i16(i16, i1 immarg) +declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1 immarg) +declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1 immarg) +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1 immarg) +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg) +declare <17 x i16> @llvm.abs.v17i16(<17 x i16>, i1 immarg) +declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1 immarg) + +declare i8 @llvm.abs.i8(i8, i1 immarg) +declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1 immarg) +declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1 immarg) +declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1 immarg) +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1 immarg) +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1 immarg) +declare <33 x i8> @llvm.abs.v33i8(<33 x i8>, i1 immarg) +declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1 immarg) + +define i32 @abs_nonpoison(i32 %arg) { +; FAST-LABEL: 'abs_nonpoison' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/100523.diff 2 Files Affected: - (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+18-14) - (modified) llvm/test/Analysis/CostModel/AMDGPU/abs.ll (+20-20) ``diff diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index 133b95609bc15..623e02eb8239d 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 3 f
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/100523.diff 2 Files Affected: - (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+18-14) - (modified) llvm/test/Analysis/CostModel/AMDGPU/abs.ll (+20-20) ``diff diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index 133b95609bc15..623e02eb8239d 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 3 fo
[llvm-branch-commits] [llvm] AMDGPU: Handle new atomicrmw metadata for fadd case (PR #96760)
arsenm wrote: ping https://github.com/llvm/llvm-project/pull/96760 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) (PR #100546)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/100546 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) (PR #100546)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/100546 Backport 6da6772bf0a33131aa8540c9d4f60d5db75c32b5 Requested by: @kmclaughlin-arm >From 6271d3d03846216502487711f330227a4a6d3022 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Wed, 24 Jul 2024 14:30:25 +0100 Subject: [PATCH] [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) The documentation for the __arm_get_current_vg support routine specifies that the following registers are call-preserved: - X1-X15, X19-X29 and SP - Z0-Z31 - P0-P15 This patch rewrites the implementation of this routine in compiler-rt, as the current version does not guarantee that these registers will be preserved. (cherry picked from commit 6da6772bf0a33131aa8540c9d4f60d5db75c32b5) --- compiler-rt/lib/builtins/aarch64/sme-abi-vg.c | 28 compiler-rt/lib/builtins/aarch64/sme-abi.S| 44 +++ 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c index 062cf80fc6848..20061012e16c6 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c +++ b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c @@ -10,15 +10,6 @@ struct FEATURES { extern struct FEATURES __aarch64_cpu_features; -struct SME_STATE { - long PSTATE; - long TPIDR2_EL0; -}; - -extern struct SME_STATE __arm_sme_state(void) __arm_streaming_compatible; - -extern bool __aarch64_has_sme_and_tpidr2_el0; - #if __GNUC__ >= 9 #pragma GCC diagnostic ignored "-Wprio-ctor-dtor" #endif @@ -28,22 +19,3 @@ __attribute__((constructor(90))) static void get_aarch64_cpu_features(void) { __init_cpu_features(); } - -__attribute__((target("sve"))) long -__arm_get_current_vg(void) __arm_streaming_compatible { - struct SME_STATE State = __arm_sme_state(); - unsigned long long features = - __atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED); - bool HasSVE = features & (1ULL << FEAT_SVE); - - if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0) -return 0; - - if (HasSVE || (State.PSTATE & 1)) { -long vl; -__asm__ __volatile__("cntd %0" : "=r"(vl)); -return vl; - } - - return 0; -} diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S index 4c0ff66931db7..cd8153f60670f 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi.S +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -12,11 +12,15 @@ #if !defined(__APPLE__) #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) #define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) +#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features) +#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features) #else // MachO requires @page/@pageoff directives because the global is defined // in a different file. Otherwise this file may fail to build. #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page #define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff +#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page +#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff #endif .arch armv9-a+sme @@ -180,6 +184,46 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) ret END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable) +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg) + .variant_pcs __arm_get_current_vg + BTI_C + + stp x29, x30, [sp, #-16]! + .cfi_def_cfa_offset 16 + mov x29, sp + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + adrpx17, CPU_FEATS_SYMBOL + ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET] + tbnzw17, #30, 0f + adrpx16, TPIDR2_SYMBOL + ldrbw16, [x16, TPIDR2_SYMBOL_OFFSET] + cbz w16, 1f +0: + mov x18, x1 + bl __arm_sme_state + mov x1, x18 + and x17, x17, #0x4000 + bfxil x17, x0, #0, #1 + cbz x17, 1f + cntdx0 + .cfi_def_cfa wsp, 16 + ldp x29, x30, [sp], #16 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + ret +1: + mov x0, xzr + .cfi_def_cfa wsp, 16 + ldp x29, x30, [sp], #16 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + ret +END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg) + NO_EXEC_STACK_DIRECTIVE // GNU property note for BTI and PAC ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) (PR #100546)
llvmbot wrote: @sdesmalen-arm What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/100546 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) (PR #100546)
sdesmalen-arm wrote: It would be great if we could merge this fix into the release branch! https://github.com/llvm/llvm-project/pull/100546 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [flang] release/19.x: [Flang][Driver] Enable config file options (#100343) (PR #100541)
https://github.com/pawosm-arm approved this pull request. https://github.com/llvm/llvm-project/pull/100541 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)
@@ -42,75 +42,50 @@ define i32 @umax(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> un
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)
https://github.com/jayfoad edited https://github.com/llvm/llvm-project/pull/100513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)
https://github.com/jayfoad approved this pull request. LGTM. https://github.com/llvm/llvm-project/pull/100513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)
@@ -0,0 +1,366 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s + +define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @uadd_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX7-NEXT:ret <2 x i16> [[INS_1]] +; +; GFX8-LABEL: @uadd_sat_v2i16( +; GFX8-NEXT: bb: +; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX8-NEXT:ret <2 x i16> [[TMP0]] +; +; GFX9-LABEL: @uadd_sat_v2i16( +; GFX9-NEXT: bb: +; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX9-NEXT:ret <2 x i16> [[TMP0]] +; +bb: + %arg0.0 = extractelement <2 x i16> %arg0, i64 0 + %arg0.1 = extractelement <2 x i16> %arg0, i64 1 + %arg1.0 = extractelement <2 x i16> %arg1, i64 0 + %arg1.1 = extractelement <2 x i16> %arg1, i64 1 + %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0) + %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1) + %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 + %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 + ret <2 x i16> %ins.1 +} + +define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @usub_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX7-NEXT:ret <2 x i16> [[INS_1]] +; +; GFX8-LABEL: @usub_sat_v2i16( +; GFX8-NEXT: bb: +; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX8-NEXT:ret <2 x i16> [[TMP0]] +; +; GFX9-LABEL: @usub_sat_v2i16( +; GFX9-NEXT: bb: +; GFX9-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX9-NEXT:ret <2 x i16> [[TMP0]] +; +bb: + %arg0.0 = extractelement <2 x i16> %arg0, i64 0 + %arg0.1 = extractelement <2 x i16> %arg0, i64 1 + %arg1.0 = extractelement <2 x i16> %arg1, i64 0 + %arg1.1 = extractelement <2 x i16> %arg1, i64 1 + %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0) + %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1) + %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 + %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 + ret <2 x i16> %ins.1 +} + +define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { +; GFX7-LABEL: @sadd_sat_v2i16( +; GFX7-NEXT: bb: +; GFX7-NEXT:[[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX7-NEXT:[[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX7-NEXT:[[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX7-NEXT:[[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX7-NEXT:[[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX7-NEXT:[[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX7-NEXT:[[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX7-NEXT:[[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX7-NEXT:ret <2 x i16> [[INS_1]] +; +; GFX8-LABEL: @sadd_sat_v2i16( +; GFX8-NEXT: bb: +; GFX8-NEXT:[[TMP0:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) +; GFX8-NEXT:ret <2 x i16> [[T
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
@@ -54,11 +54,11 @@ define i32 @abs_nonpoison(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) jayfoad wrote: What is this demonstrating? 2 does not seem like the right cost for any VALU/SALU operation on v32i16. https://github.com/llvm/llvm-project/pull/100523 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Automate operand structure definition (PR #99508)
https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/99508 >From 1d99939c020aab8650cd20df24e0b1e71726ae90 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Wed, 17 Jul 2024 13:26:09 +0100 Subject: [PATCH 1/3] [MLIR][OpenMP] Automate operand structure definition This patch adds the "gen-openmp-clause-ops" `mlir-tblgen` generator to produce the structure definitions previously in OpenMPClauseOperands.h automatically from the information contained in OpenMPOps.td and OpenMPClauses.td. Changes introduced to the `ElementsAttrBase` common tablegen class, as well as some of its subclasses, add more fine-grained information on their shape and type of their elements. This information is needed in order to properly generate the corresponding types to represent these attributes within the produced operand structures. The original header is maintained to enable the definition of similar structures that are not directly related to any single `OpenMP_Clause` or `OpenMP_Op` tablegen definition. --- .../mlir/Dialect/OpenMP/CMakeLists.txt| 1 + .../Dialect/OpenMP/OpenMPClauseOperands.h | 290 +- mlir/include/mlir/IR/CommonAttrConstraints.td | 18 +- mlir/test/mlir-tblgen/openmp-clause-ops.td| 78 + mlir/tools/mlir-tblgen/OmpOpGen.cpp | 174 ++- 5 files changed, 266 insertions(+), 295 deletions(-) create mode 100644 mlir/test/mlir-tblgen/openmp-clause-ops.td diff --git a/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt b/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt index d3422f6e48b06..23ccba3067bcb 100644 --- a/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt @@ -17,6 +17,7 @@ mlir_tablegen(OpenMPOpsDialect.h.inc -gen-dialect-decls -dialect=omp) mlir_tablegen(OpenMPOpsDialect.cpp.inc -gen-dialect-defs -dialect=omp) mlir_tablegen(OpenMPOps.h.inc -gen-op-decls) mlir_tablegen(OpenMPOps.cpp.inc -gen-op-defs) +mlir_tablegen(OpenMPClauseOps.h.inc -gen-openmp-clause-ops) mlir_tablegen(OpenMPOpsTypes.h.inc -gen-typedef-decls -typedefs-dialect=omp) mlir_tablegen(OpenMPOpsTypes.cpp.inc -gen-typedef-defs -typedefs-dialect=omp) mlir_tablegen(OpenMPOpsEnums.h.inc -gen-enum-decls) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index f4a87d52a172e..e5b4de4908966 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -23,303 +23,31 @@ #define GET_ATTRDEF_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h.inc" +#include "mlir/Dialect/OpenMP/OpenMPClauseOps.h.inc" + namespace mlir { namespace omp { //===--===// -// Mixin structures defining MLIR operands associated with each OpenMP clause. +// Extra clause operand structures. //===--===// -struct AlignedClauseOps { - llvm::SmallVector alignedVars; - llvm::SmallVector alignments; -}; - -struct AllocateClauseOps { - llvm::SmallVector allocateVars, allocatorVars; -}; - -struct CancelDirectiveNameClauseOps { - ClauseCancellationConstructTypeAttr cancelDirective; -}; - -struct CollapseClauseOps { - llvm::SmallVector collapseLowerBound, collapseUpperBound, collapseStep; -}; - -struct CopyprivateClauseOps { - llvm::SmallVector copyprivateVars; - llvm::SmallVector copyprivateSyms; -}; - -struct CriticalNameClauseOps { - StringAttr symName; -}; - -struct DependClauseOps { - llvm::SmallVector dependKinds; - llvm::SmallVector dependVars; -}; - -struct DeviceClauseOps { - Value device; -}; - struct DeviceTypeClauseOps { // The default capture type. DeclareTargetDeviceType deviceType = DeclareTargetDeviceType::any; }; -struct DistScheduleClauseOps { - UnitAttr distScheduleStatic; - Value distScheduleChunkSize; -}; - -struct DoacrossClauseOps { - ClauseDependAttr doacrossDependType; - IntegerAttr doacrossNumLoops; - llvm::SmallVector doacrossDependVars; -}; - -struct FilterClauseOps { - Value filteredThreadId; -}; - -struct FinalClauseOps { - Value final; -}; - -struct GrainsizeClauseOps { - Value grainsize; -}; - -struct HasDeviceAddrClauseOps { - llvm::SmallVector hasDeviceAddrVars; -}; - -struct HintClauseOps { - IntegerAttr hint; -}; - -struct IfClauseOps { - Value ifVar; -}; - -struct InReductionClauseOps { - llvm::SmallVector inReductionVars; - llvm::SmallVector inReductionByref; - llvm::SmallVector inReductionSyms; -}; - -struct IsDevicePtrClauseOps { - llvm::SmallVector isDevicePtrVars; -}; - -struct LinearClauseOps { - llvm::SmallVector linearVars, linearStepVars; -}; - -struct LoopRelatedOps { - UnitAttr loopInclusive; -}; - -struct MapClauseOps { - llvm::SmallVector mapVars; -}; - -struct MergeableClauseOps { - UnitAttr mergeable; -}; - -struct NogroupClauseOps { - UnitAttr nogroup; -};
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Automate operand structure definition (PR #99508)
@@ -12,11 +12,52 @@ #include "mlir/TableGen/GenInfo.h" +#include "mlir/TableGen/CodeGenHelpers.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" using namespace llvm; +/// The code block defining the base mixin class for combining clause operand +/// structures. +static const char *const baseMixinClass = R"( +namespace detail { +template +struct Clauses : public Mixins... {}; +} // namespace detail +)"; + +/// The code block defining operation argument structures. +static const char *const operationArgStruct = R"( +using {0}Operands = detail::Clauses<{1}>; +)"; + +/// Remove multiple optional prefixes and suffixes from \c str. +/// +/// Prefixes and suffixes are attempted to be removed once in the order they +/// appear in the \c prefixes and \c suffixes arguments. All prefixes are +/// processed before suffixes are. This means it will behave as shown in the +/// following example: +/// - str: "PrePreNameSuf1Suf2" +/// - prefixes: ["Pre"] +/// - suffixes: ["Suf1", "Suf2"] +/// - return: "PreNameSuf1" +static StringRef stripPrefixAndSuffix(StringRef str, + llvm::ArrayRef prefixes, + llvm::ArrayRef suffixes) { + for (StringRef prefix : prefixes) +if (str.starts_with(prefix)) + str = str.substr(prefix.size()); + + for (StringRef suffix : suffixes) +if (str.ends_with(suffix)) + str = str.substr(0, str.size() - suffix.size()); skatrak wrote: Done. https://github.com/llvm/llvm-project/pull/99508 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Automate operand structure definition (PR #99508)
@@ -12,11 +12,52 @@ #include "mlir/TableGen/GenInfo.h" +#include "mlir/TableGen/CodeGenHelpers.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" using namespace llvm; +/// The code block defining the base mixin class for combining clause operand +/// structures. +static const char *const baseMixinClass = R"( +namespace detail { +template +struct Clauses : public Mixins... {}; +} // namespace detail +)"; + +/// The code block defining operation argument structures. +static const char *const operationArgStruct = R"( +using {0}Operands = detail::Clauses<{1}>; +)"; + +/// Remove multiple optional prefixes and suffixes from \c str. +/// +/// Prefixes and suffixes are attempted to be removed once in the order they +/// appear in the \c prefixes and \c suffixes arguments. All prefixes are +/// processed before suffixes are. This means it will behave as shown in the +/// following example: +/// - str: "PrePreNameSuf1Suf2" +/// - prefixes: ["Pre"] +/// - suffixes: ["Suf1", "Suf2"] +/// - return: "PreNameSuf1" +static StringRef stripPrefixAndSuffix(StringRef str, + llvm::ArrayRef prefixes, + llvm::ArrayRef suffixes) { + for (StringRef prefix : prefixes) +if (str.starts_with(prefix)) + str = str.substr(prefix.size()); skatrak wrote: Done. https://github.com/llvm/llvm-project/pull/99508 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR][OpenMP] Automate operand structure definition (PR #99508)
@@ -408,17 +408,26 @@ class ElementsAttrBase : let storageType = [{ ::mlir::ElementsAttr }]; let returnType = [{ ::mlir::ElementsAttr }]; let convertFromStorage = "$_self"; + + // The underlying C++ value type of each element. + string elementReturnType = ?; skatrak wrote: > I'm wary about making this kind of change in a widely shared file. Maybe we > could just handle this in OmpOpGen.cpp? Specifically, infer this information > in there based on the type of the attribute? Yes, this is something I tried to avoid as well. The problem is that the only existing attribute we could potentially use to get the element type information is the `returnType` inherited from `Attr`. We could potentially remove the "::llvm::ArrayRef<>" part of that string in the case of `DenseArrayAttrBase` and derived types, which doesn't seem like a very clean solution but it would work (as long as these subclasses/definitions don't override that property). For other subclasses of `ElementsAttrBase` we would have to accept having to use array-style attributes (e.g. `::mlir::DenseIntElementsAttr`) instead of lists of elements. I'd like to avoid hardcoding as many type names as possible in the new tablegen backend, since people could just create new general or OpenMP-specific attribute types and then it would have to be updated. I think it makes sense to specialize it for as few and as generic cases as we can get away with and just make sure they already contain the information we need. In this case, we're just missing the element type and rank of array attributes, which seems like something that could be of general use eventually. Having said that, this is just the approach that works that made the most sense to me, but I'm very much interested in discussing potentially better alternatives. > This may need wider support, specifically we may need to generate an accessor > function in .h.inc/.cpp.inc. Good point, I'll delay making this change until we decide whether we want to keep these new properties or not. https://github.com/llvm/llvm-project/pull/99508 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang][headers] Including stddef.h always redefines NULL (#99727) (PR #100191)
https://github.com/AaronBallman approved this pull request. LGTM, I think the CI failures are unrelated to this patch. https://github.com/llvm/llvm-project/pull/100191 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] cf55425 - Revert "MTM: fix issues after cursory reading (#100404)"
Author: Mehdi Amini Date: 2024-07-25T14:06:44+02:00 New Revision: cf55425639c201d8b3a80541bc222934485d5eda URL: https://github.com/llvm/llvm-project/commit/cf55425639c201d8b3a80541bc222934485d5eda DIFF: https://github.com/llvm/llvm-project/commit/cf55425639c201d8b3a80541bc222934485d5eda.diff LOG: Revert "MTM: fix issues after cursory reading (#100404)" This reverts commit 0760aec54ca6f680f4786c4fc3bbae8f500deeab. Added: Modified: llvm/lib/CodeGen/MachineTraceMetrics.cpp Removed: diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index dd1faff355b52..bf3add010574b 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -24,11 +24,17 @@ #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include using namespace llvm; @@ -127,7 +133,7 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { // Scale the resource cycles so they are comparable. unsigned PROffset = MBB->getNumber() * PRKinds; - for (unsigned K = 0; K < PRKinds; ++K) + for (unsigned K = 0; K != PRKinds; ++K) ProcReleaseAtCycles[PROffset + K] = PRCycles[K] * SchedModel.getResourceFactor(K); @@ -140,14 +146,15 @@ MachineTraceMetrics::getProcReleaseAtCycles(unsigned MBBNum) const { "getResources() must be called before getProcReleaseAtCycles()"); unsigned PRKinds = SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size()); - return ArrayRef{ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds}; + return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds); } //===--===// // Ensemble utility functions //===--===// -MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *CT) : MTM(*CT) { +MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) + : MTM(*ct) { BlockInfo.resize(MTM.BlockInfo.size()); unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds); @@ -191,7 +198,7 @@ computeDepthResources(const MachineBasicBlock *MBB) { // Compute per-resource depths. ArrayRef PredPRDepths = getProcResourceDepths(PredNum); ArrayRef PredPRCycles = MTM.getProcReleaseAtCycles(PredNum); - for (unsigned K = 0; K < PRKinds; ++K) + for (unsigned K = 0; K != PRKinds; ++K) ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K]; } @@ -224,7 +231,7 @@ computeHeightResources(const MachineBasicBlock *MBB) { // Compute per-resource heights. ArrayRef SuccPRHeights = getProcResourceHeights(SuccNum); - for (unsigned K = 0; K < PRKinds; ++K) + for (unsigned K = 0; K != PRKinds; ++K) ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K]; } @@ -257,7 +264,7 @@ MachineTraceMetrics::Ensemble:: getProcResourceDepths(unsigned MBBNum) const { unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); - return ArrayRef{ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds}; + return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds); } /// Get an array of processor resource heights for MBB. Indexed by processor @@ -270,7 +277,7 @@ MachineTraceMetrics::Ensemble:: getProcResourceHeights(unsigned MBBNum) const { unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); - return ArrayRef{ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds}; + return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds); } //===--===// @@ -307,8 +314,8 @@ class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override; public: - MinInstrCountEnsemble(MachineTraceMetrics *MTM) - : MachineTraceMetrics::Ensemble(MTM) {} + MinInstrCountEnsemble(MachineTraceMetrics *mtm) +: MachineTraceMetrics::Ensemble(mtm) {} }; /// Pick only the current basic block for the trace and do not choose any @@ -388,15 +395,15 @@ MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { // Get an Ensemble sub-class for the requested trace strategy. MachineTraceMetrics::Ensemble * -MachineTraceMetrics::getEnsemble(MachineTraceStrat
[llvm-branch-commits] [lldb] b851520 - Revert "[lldb] Fix incorrect uses of logical operator in 'if' condition check…"
Author: David Spickett Date: 2024-07-25T13:16:55+01:00 New Revision: b85152008f41e8136f5e21db875a63b464f8c10f URL: https://github.com/llvm/llvm-project/commit/b85152008f41e8136f5e21db875a63b464f8c10f DIFF: https://github.com/llvm/llvm-project/commit/b85152008f41e8136f5e21db875a63b464f8c10f.diff LOG: Revert "[lldb] Fix incorrect uses of logical operator in 'if' condition check…" This reverts commit 2ba3fe7356f065757a2279f65e4ef5c8f1476293. Added: Modified: lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h Removed: diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h index 2667f73516ba3..e1a3156d10afd 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/Interfaces/ScriptedPythonInterface.h @@ -85,15 +85,14 @@ class ScriptedPythonInterface : virtual public ScriptedInterface { bool has_class_name = !class_name.empty(); bool has_interpreter_dict = !(llvm::StringRef(m_interpreter.GetDictionaryName()).empty()); - -if (!has_class_name) - return create_error("Missing script class name."); - -if (!has_interpreter_dict) - return create_error("Invalid script interpreter dictionary."); - -if (!script_obj) - return create_error("Missing scripting object."); +if (!has_class_name && !has_interpreter_dict && !script_obj) { + if (!has_class_name) +return create_error("Missing script class name."); + else if (!has_interpreter_dict) +return create_error("Invalid script interpreter dictionary."); + else +return create_error("Missing scripting object."); +} Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN, Locker::FreeLock); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (PR #99283)
@@ -1967,22 +2047,13 @@ splitCoroutine(Function &F, SmallVectorImpl &Clones, for (DbgVariableRecord *DVR : DbgVariableRecords) coro::salvageDebugInfo(ArgToAllocaMap, *DVR, Shape.OptimizeFrame, false /*UseEntryValue*/); - return Shape; -} -/// Remove calls to llvm.coro.end in the original function. -static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) { - if (Shape.ABI != coro::ABI::Switch) { -for (auto *End : Shape.CoroEnds) { - replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); -} - } else { -for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { - auto &Context = End->getContext(); - End->replaceAllUsesWith(ConstantInt::getFalse(Context)); - End->eraseFromParent(); -} + removeCoroEndsFromRampFunction(Shape); + + if (!isNoSuspendCoroutine && Shape.ABI == coro::ABI::Switch) { vogelsgesang wrote: Yes, I would be in favor of adding a second attribute. What do you & others think? CC @ChuanqiXu9 https://github.com/llvm/llvm-project/pull/99283 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] release/19.x: [flang][OpenMP] Initialize privatised derived type variables (#100417) (PR #100587)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/100587 Backport 98e733e Requested by: @tblah >From 6867a167f40aacd653e56a3fab08bc52797f46cd Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Thu, 25 Jul 2024 16:53:27 +0100 Subject: [PATCH] [flang][OpenMP] Initialize privatised derived type variables (#100417) Fixes #91928 (cherry picked from commit 98e733eaf2af1a5c1d9392e279d21182ffdf560d) --- flang/include/flang/Lower/ConvertVariable.h | 8 flang/lib/Lower/ConvertVariable.cpp | 23 - .../lib/Lower/OpenMP/DataSharingProcessor.cpp | 6 +++ .../Lower/OpenMP/private-derived-type.f90 | 47 +++ 4 files changed, 73 insertions(+), 11 deletions(-) create mode 100644 flang/test/Lower/OpenMP/private-derived-type.f90 diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h index 515f4695951b4..de394a39e112e 100644 --- a/flang/include/flang/Lower/ConvertVariable.h +++ b/flang/include/flang/Lower/ConvertVariable.h @@ -62,6 +62,14 @@ using AggregateStoreMap = llvm::DenseMap; void instantiateVariable(AbstractConverter &, const pft::Variable &var, SymMap &symMap, AggregateStoreMap &storeMap); +/// Does this variable have a default initialization? +bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym); + +/// Call default initialization runtime routine to initialize \p var. +void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter, +const Fortran::semantics::Symbol &sym, +Fortran::lower::SymMap &symMap); + /// Create a fir::GlobalOp given a module variable definition. This is intended /// to be used when lowering a module definition, not when lowering variables /// used from a module. For used variables instantiateVariable must directly be diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 47ad48fb322cc..4fcfa0b126e04 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -72,7 +72,8 @@ static mlir::Value genScalarValue(Fortran::lower::AbstractConverter &converter, } /// Does this variable have a default initialization? -static bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym) { +bool Fortran::lower::hasDefaultInitialization( +const Fortran::semantics::Symbol &sym) { if (sym.has() && sym.size()) if (!Fortran::semantics::IsAllocatableOrPointer(sym)) if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType()) @@ -353,7 +354,7 @@ static mlir::Value genComponentDefaultInit( // global constructor since this has no runtime cost. componentValue = fir::factory::createUnallocatedBox( builder, loc, componentTy, std::nullopt); -} else if (hasDefaultInitialization(component)) { +} else if (Fortran::lower::hasDefaultInitialization(component)) { // Component type has default initialization. componentValue = genDefaultInitializerValue(converter, loc, component, componentTy, stmtCtx); @@ -556,7 +557,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter, builder.createConvert(loc, symTy, fir::getBase(initVal)); builder.create(loc, castTo); }); -} else if (hasDefaultInitialization(sym)) { +} else if (Fortran::lower::hasDefaultInitialization(sym)) { Fortran::lower::createGlobalInitialization( builder, global, [&](fir::FirOpBuilder &builder) { Fortran::lower::StatementContext stmtCtx( @@ -752,17 +753,15 @@ mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) { return true; // Local variables (including function results), and intent(out) dummies must // be default initialized at runtime if their type has default initialization. - return hasDefaultInitialization(sym); + return Fortran::lower::hasDefaultInitialization(sym); } /// Call default initialization runtime routine to initialize \p var. -static void -defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter, - const Fortran::lower::pft::Variable &var, - Fortran::lower::SymMap &symMap) { +void Fortran::lower::defaultInitializeAtRuntime( +Fortran::lower::AbstractConverter &converter, +const Fortran::semantics::Symbol &sym, Fortran::lower::SymMap &symMap) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); mlir::Location loc = converter.getCurrentLocation(); - const Fortran::semantics::Symbol &sym = var.getSymbol(); fir::ExtendedValue exv = converter.getSymbolExtendedValue(sym, &symMap); if (Fortran::semantics::IsOptional(sym)) { // 15.5.2.12 point 3, absent optional dummies are not initialized. @@ -927,7 +926,8 @@ static void instantiateLocal(Fortran
[llvm-branch-commits] [flang] release/19.x: [flang][OpenMP] Initialize privatised derived type variables (#100417) (PR #100587)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/100587 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] release/19.x: [flang][OpenMP] Initialize privatised derived type variables (#100417) (PR #100587)
llvmbot wrote: @jeanPerier What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/100587 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] release/19.x: [flang][OpenMP] Initialize privatised derived type variables (#100417) (PR #100587)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: None (llvmbot) Changes Backport 98e733e Requested by: @tblah --- Full diff: https://github.com/llvm/llvm-project/pull/100587.diff 4 Files Affected: - (modified) flang/include/flang/Lower/ConvertVariable.h (+8) - (modified) flang/lib/Lower/ConvertVariable.cpp (+12-11) - (modified) flang/lib/Lower/OpenMP/DataSharingProcessor.cpp (+6) - (added) flang/test/Lower/OpenMP/private-derived-type.f90 (+47) ``diff diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h index 515f4695951b4..de394a39e112e 100644 --- a/flang/include/flang/Lower/ConvertVariable.h +++ b/flang/include/flang/Lower/ConvertVariable.h @@ -62,6 +62,14 @@ using AggregateStoreMap = llvm::DenseMap; void instantiateVariable(AbstractConverter &, const pft::Variable &var, SymMap &symMap, AggregateStoreMap &storeMap); +/// Does this variable have a default initialization? +bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym); + +/// Call default initialization runtime routine to initialize \p var. +void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter, +const Fortran::semantics::Symbol &sym, +Fortran::lower::SymMap &symMap); + /// Create a fir::GlobalOp given a module variable definition. This is intended /// to be used when lowering a module definition, not when lowering variables /// used from a module. For used variables instantiateVariable must directly be diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 47ad48fb322cc..4fcfa0b126e04 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -72,7 +72,8 @@ static mlir::Value genScalarValue(Fortran::lower::AbstractConverter &converter, } /// Does this variable have a default initialization? -static bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym) { +bool Fortran::lower::hasDefaultInitialization( +const Fortran::semantics::Symbol &sym) { if (sym.has() && sym.size()) if (!Fortran::semantics::IsAllocatableOrPointer(sym)) if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType()) @@ -353,7 +354,7 @@ static mlir::Value genComponentDefaultInit( // global constructor since this has no runtime cost. componentValue = fir::factory::createUnallocatedBox( builder, loc, componentTy, std::nullopt); -} else if (hasDefaultInitialization(component)) { +} else if (Fortran::lower::hasDefaultInitialization(component)) { // Component type has default initialization. componentValue = genDefaultInitializerValue(converter, loc, component, componentTy, stmtCtx); @@ -556,7 +557,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter, builder.createConvert(loc, symTy, fir::getBase(initVal)); builder.create(loc, castTo); }); -} else if (hasDefaultInitialization(sym)) { +} else if (Fortran::lower::hasDefaultInitialization(sym)) { Fortran::lower::createGlobalInitialization( builder, global, [&](fir::FirOpBuilder &builder) { Fortran::lower::StatementContext stmtCtx( @@ -752,17 +753,15 @@ mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) { return true; // Local variables (including function results), and intent(out) dummies must // be default initialized at runtime if their type has default initialization. - return hasDefaultInitialization(sym); + return Fortran::lower::hasDefaultInitialization(sym); } /// Call default initialization runtime routine to initialize \p var. -static void -defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter, - const Fortran::lower::pft::Variable &var, - Fortran::lower::SymMap &symMap) { +void Fortran::lower::defaultInitializeAtRuntime( +Fortran::lower::AbstractConverter &converter, +const Fortran::semantics::Symbol &sym, Fortran::lower::SymMap &symMap) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); mlir::Location loc = converter.getCurrentLocation(); - const Fortran::semantics::Symbol &sym = var.getSymbol(); fir::ExtendedValue exv = converter.getSymbolExtendedValue(sym, &symMap); if (Fortran::semantics::IsOptional(sym)) { // 15.5.2.12 point 3, absent optional dummies are not initialized. @@ -927,7 +926,8 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter, if (needDummyIntentoutFinalization(var)) finalizeAtRuntime(converter, var, symMap); if (mustBeDefaultInitializedAtRuntime(var)) -defaultInitializeAtRuntime(converter, var, symMap); +Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
[llvm-branch-commits] [clang] release/19.x: [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) (PR #100590)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/100590 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) (PR #100590)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/100590 Backport 3295d377f37a60597321f502d164b5d6b1948e28 Requested by: @Endilll >From 3c7695ec2681c3ca531b5ce2a2fd20b8301df0b5 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Thu, 25 Jul 2024 20:15:14 +0400 Subject: [PATCH] [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) `__is_layout_compatible` was added in Clang 19 (#81506), and at that time it wasn't entirely clear whether it should be a revertible type trait or not. We decided to follow the example of other type traits. Since then #95969 happened, and now we know that we don't want new revertible type traits. This patch removes `__is_layout_compatible` from revertible type traits list, and leaves a comment what revertible type traits are, and that new type traits should not be added there. The intention is to also cherry-pick this to 19 branch. (cherry picked from commit 3295d377f37a60597321f502d164b5d6b1948e28) --- clang/lib/Parse/ParseExpr.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 0a017ae79de75..e82b565272831 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -763,6 +763,9 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback { bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II, tok::TokenKind *Kind) { if (RevertibleTypeTraits.empty()) { +// Revertible type trait is a feature for backwards compatibility with older +// standard libraries that declare their own structs with the same name as +// the builtins listed below. New builtins should NOT be added to this list. #define RTT_JOIN(X, Y) X##Y #define REVERTIBLE_TYPE_TRAIT(Name) \ RevertibleTypeTraits[PP.getIdentifierInfo(#Name)] = RTT_JOIN(tok::kw_, Name) @@ -790,7 +793,6 @@ bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II, REVERTIBLE_TYPE_TRAIT(__is_fundamental); REVERTIBLE_TYPE_TRAIT(__is_integral); REVERTIBLE_TYPE_TRAIT(__is_interface_class); -REVERTIBLE_TYPE_TRAIT(__is_layout_compatible); REVERTIBLE_TYPE_TRAIT(__is_literal); REVERTIBLE_TYPE_TRAIT(__is_lvalue_expr); REVERTIBLE_TYPE_TRAIT(__is_lvalue_reference); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) (PR #100590)
llvmbot wrote: @cor3ntin What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/100590 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) (PR #100590)
llvmbot wrote: @llvm/pr-subscribers-clang Author: None (llvmbot) Changes Backport 3295d377f37a60597321f502d164b5d6b1948e28 Requested by: @Endilll --- Full diff: https://github.com/llvm/llvm-project/pull/100590.diff 1 Files Affected: - (modified) clang/lib/Parse/ParseExpr.cpp (+3-1) ``diff diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 0a017ae79de75..e82b565272831 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -763,6 +763,9 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback { bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II, tok::TokenKind *Kind) { if (RevertibleTypeTraits.empty()) { +// Revertible type trait is a feature for backwards compatibility with older +// standard libraries that declare their own structs with the same name as +// the builtins listed below. New builtins should NOT be added to this list. #define RTT_JOIN(X, Y) X##Y #define REVERTIBLE_TYPE_TRAIT(Name) \ RevertibleTypeTraits[PP.getIdentifierInfo(#Name)] = RTT_JOIN(tok::kw_, Name) @@ -790,7 +793,6 @@ bool Parser::isRevertibleTypeTrait(const IdentifierInfo *II, REVERTIBLE_TYPE_TRAIT(__is_fundamental); REVERTIBLE_TYPE_TRAIT(__is_integral); REVERTIBLE_TYPE_TRAIT(__is_interface_class); -REVERTIBLE_TYPE_TRAIT(__is_layout_compatible); REVERTIBLE_TYPE_TRAIT(__is_literal); REVERTIBLE_TYPE_TRAIT(__is_lvalue_expr); REVERTIBLE_TYPE_TRAIT(__is_lvalue_reference); `` https://github.com/llvm/llvm-project/pull/100590 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Remove `__is_layout_compatible` from revertible type traits list (#100572) (PR #100590)
https://github.com/cor3ntin approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/100590 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/99891 >From 0274f697376264c2d77816190f9a434f64e79089 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 22 Jul 2024 11:56:23 -0700 Subject: [PATCH 01/22] Changed assignment of profiles with pseudo probe index Created using spr 1.3.4 --- bolt/lib/Profile/StaleProfileMatching.cpp | 85 +++ .../X86/match-blocks-with-pseudo-probes.test | 25 ++ 2 files changed, 78 insertions(+), 32 deletions(-) diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index 4105f626fb5b6..c135ee5ff4837 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -195,11 +195,15 @@ class StaleMatcher { void init(const std::vector &Blocks, const std::vector &Hashes, const std::vector &CallHashes, -std::optional YamlBFGUID) { +const std::unordered_map> +IndexToBinaryPseudoProbes, +const std::unordered_map +BinaryPseudoProbeToBlock, +const uint64_t YamlBFGUID) { assert(Blocks.size() == Hashes.size() && Hashes.size() == CallHashes.size() && "incorrect matcher initialization"); - for (size_t I = 0; I < Blocks.size(); I++) { FlowBlock *Block = Blocks[I]; uint16_t OpHash = Hashes[I].OpcodeHash; @@ -209,6 +213,8 @@ class StaleMatcher { std::make_pair(Hashes[I], Block)); this->Blocks.push_back(Block); } +this->IndexToBinaryPseudoProbes = IndexToBinaryPseudoProbes; +this->BinaryPseudoProbeToBlock = BinaryPseudoProbeToBlock; this->YamlBFGUID = YamlBFGUID; } @@ -234,10 +240,14 @@ class StaleMatcher { using HashBlockPairType = std::pair; std::unordered_map> OpHashToBlocks; std::unordered_map> CallHashToBlocks; - std::vector Blocks; + std::unordered_map> + IndexToBinaryPseudoProbes; + std::unordered_map + BinaryPseudoProbeToBlock; + std::vector Blocks; // If the pseudo probe checksums of the profiled and binary functions are // equal, then the YamlBF's GUID is defined and used to match blocks. - std::optional YamlBFGUID; + uint64_t YamlBFGUID; // Uses OpcodeHash to find the most similar block for a given hash. const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const { @@ -284,7 +294,7 @@ class StaleMatcher { // Searches for the pseudo probe attached to the matched function's block, // ignoring pseudo probes attached to function calls and inlined functions' // blocks. -outs() << "match with pseudo probes\n"; +std::vector BlockPseudoProbes; for (const auto &PseudoProbe : PseudoProbes) { // Ensures that pseudo probe information belongs to the appropriate // function and not an inlined function. @@ -293,11 +303,30 @@ class StaleMatcher { // Skips pseudo probes attached to function calls. if (PseudoProbe.Type != static_cast(PseudoProbeType::Block)) continue; - assert(PseudoProbe.Index < Blocks.size() && - "pseudo probe index out of range"); - return Blocks[PseudoProbe.Index]; + + BlockPseudoProbes.push_back(&PseudoProbe); } -return nullptr; + +// Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo +// probe and binary pseudo probe. +if (BlockPseudoProbes.size() == 0 || BlockPseudoProbes.size() > 1) + return nullptr; + +uint64_t Index = BlockPseudoProbes[0]->Index; +assert(Index < Blocks.size() && "Invalid pseudo probe index"); + +auto It = IndexToBinaryPseudoProbes.find(Index); +assert(It != IndexToBinaryPseudoProbes.end() && + "All blocks should have a pseudo probe"); +if (It->second.size() > 1) + return nullptr; + +const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0]; +auto BinaryPseudoProbeIt = BinaryPseudoProbeToBlock.find(BinaryPseudoProbe); +assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() && + "All binary pseudo probes should belong a binary basic block"); + +return BinaryPseudoProbeIt->second; } }; @@ -491,6 +520,11 @@ size_t matchWeightsByHashes( std::vector CallHashes; std::vector Blocks; std::vector BlendedHashes; + std::unordered_map> + IndexToBinaryPseudoProbes; + std::unordered_map + BinaryPseudoProbeToBlock; + const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder(); for (uint64_t I = 0; I < BlockOrder.size(); I++) { const BinaryBasicBlock *BB = BlockOrder[I]; assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock"); @@ -510,9 +544,27 @@ size_t matchWeightsByHashes( Blocks.push_back(&Func.Blocks[I + 1]); BlendedBlockHash BlendedHash(BB->getHash()); BlendedHashes.push_back(BlendedHash); +if (PseudoProbeDecoder) { + const AddressProbesMap &ProbeMap = + PseudoProbeDecoder->get
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) (PR #100604)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/100604 Backport a55df237375e98cfc2520d5eb1a23b302ef02ba0 Requested by: @ldionne >From 342755c855bbc6873b4677a74812759c6a80cdae Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 25 Jul 2024 12:16:48 -0500 Subject: [PATCH] [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) The `` header uses `strtoll_l` and friends which are defined in `` on these platforms. While this works via transitive includes when modules are disabled, this doesn't work anymore if the platforms are modularized properly. (cherry picked from commit a55df237375e98cfc2520d5eb1a23b302ef02ba0) --- libcxx/include/locale | 4 1 file changed, 4 insertions(+) diff --git a/libcxx/include/locale b/libcxx/include/locale index dbec23a2c936d..573910a85bef5 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -232,6 +232,10 @@ template class messages_byname; #include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h> # endif +# if defined(__APPLE__) || defined(__FreeBSD__) +#include +# endif + # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header # endif ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) (PR #100604)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/100604 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) (PR #100604)
llvmbot wrote: @philnik777 What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/100604 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) (PR #100604)
llvmbot wrote: @llvm/pr-subscribers-libcxx Author: None (llvmbot) Changes Backport a55df237375e98cfc2520d5eb1a23b302ef02ba0 Requested by: @ldionne --- Full diff: https://github.com/llvm/llvm-project/pull/100604.diff 1 Files Affected: - (modified) libcxx/include/locale (+4) ``diff diff --git a/libcxx/include/locale b/libcxx/include/locale index dbec23a2c936d..573910a85bef5 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -232,6 +232,10 @@ template class messages_byname; #include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h> # endif +# if defined(__APPLE__) || defined(__FreeBSD__) +#include +# endif + # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header # endif `` https://github.com/llvm/llvm-project/pull/100604 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/100522 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100522 >From df2b6b7c749629f0ea50f7772329b48ba9450f2f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:47:03 +0400 Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics --- llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 310 + 1 file changed, 310 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll new file mode 100644 index 0..f65615b07abc0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -0,0 +1,310 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s +; END. + +define void @abs_nonpoison() { +; FAST-LABEL: 'abs_nonpoison' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 4
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100522 >From df2b6b7c749629f0ea50f7772329b48ba9450f2f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:47:03 +0400 Subject: [PATCH] AMDGPU: Add baseline test for cost of abs intrinsics --- llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 310 + 1 file changed, 310 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/abs.ll diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll new file mode 100644 index 0..f65615b07abc0 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -0,0 +1,310 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST-SIZE %s +; RUN: opt -passes="print" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s +; END. + +define void @abs_nonpoison() { +; FAST-LABEL: 'abs_nonpoison' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 4
[llvm-branch-commits] [llvm] TTI: Fix special casing vectorization costs of saturating add/sub (PR #97463)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/97463 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of min/max ISD nodes (PR #100514)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/100514 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100519 >From 5a2e8acf2b7e4aafae237a035f81557d97948a29 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:27:54 +0400 Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 67 +- .../Analysis/CostModel/X86/arith-overflow.ll | 120 +- 2 files changed, 96 insertions(+), 91 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a89d4fe467eb9..314390aee5085 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBO; break; case Intrinsic::smul_with_overflow: -case Intrinsic::umul_with_overflow: { - Type *MulTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); - bool IsSigned = IID == Intrinsic::smul_with_overflow; - - unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - - if (IsSigned) -Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, -CostKind, -{TTI::OK_AnyValue, TTI::OP_None}, -{TTI::OK_UniformConstantValue, TTI::OP_None}); - - Cost += thisT()->getCmpSelInstrCost( - BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); - return Cost; -} + ISD = ISD::SMULO; + break; +case Intrinsic::umul_with_overflow: + ISD = ISD::UMULO; + break; case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) @@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OverflowTy, Pred, CostKind); return Cost; } +case Intrinsic::smul_with_overflow: +case Intrinsic::umul_with_overflow: { + Type *MulTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; + + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + + if (IsSigned) +Cost += thisT()->getArithmeticInstrCost( +Instruction::AShr, MulTy, CostKind, +{TTI::OK_AnyValue, TTI::OP_None}, +{TTI::OK_UniformConstantValue, TTI::OP_None}); + + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); + return Cost; +} case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index c5da46af04367..28d53042d4c21 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 148 f
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100520 >From 39ca2c43676bf82f97f8cce2e09091e7d849dfab Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:31:04 +0400 Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +--- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 314390aee5085..1a089a3fa9634 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBSAT; break; case Intrinsic::smul_fix: -case Intrinsic::umul_fix: { - unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); - - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); - return Cost; -} + ISD = ISD::SMULFIX; + break; +case Intrinsic::umul_fix: + ISD = ISD::UMULFIX; + break; case Intrinsic::sadd_with_overflow: ISD = ISD::SADDO; break; @@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } +case Intrinsic::smul_fix: +case Intrinsic::umul_fix: { + unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); + + unsigned ExtOp = + IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost( + Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); + return Cost; +} default: break; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100523 >From 85c14e04d3e27c8609fac2890eb475963d7f008b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 + llvm/test/Analysis/CostModel/AMDGPU/abs.ll | 40 +++--- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index f65615b07abc0..e290f0631ff16 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -24,11 +24,11 @@ define void @abs_nonpoison() { ; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) -; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/100523 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/99891 >From 0274f697376264c2d77816190f9a434f64e79089 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 22 Jul 2024 11:56:23 -0700 Subject: [PATCH 01/23] Changed assignment of profiles with pseudo probe index Created using spr 1.3.4 --- bolt/lib/Profile/StaleProfileMatching.cpp | 85 +++ .../X86/match-blocks-with-pseudo-probes.test | 25 ++ 2 files changed, 78 insertions(+), 32 deletions(-) diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index 4105f626fb5b6..c135ee5ff4837 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -195,11 +195,15 @@ class StaleMatcher { void init(const std::vector &Blocks, const std::vector &Hashes, const std::vector &CallHashes, -std::optional YamlBFGUID) { +const std::unordered_map> +IndexToBinaryPseudoProbes, +const std::unordered_map +BinaryPseudoProbeToBlock, +const uint64_t YamlBFGUID) { assert(Blocks.size() == Hashes.size() && Hashes.size() == CallHashes.size() && "incorrect matcher initialization"); - for (size_t I = 0; I < Blocks.size(); I++) { FlowBlock *Block = Blocks[I]; uint16_t OpHash = Hashes[I].OpcodeHash; @@ -209,6 +213,8 @@ class StaleMatcher { std::make_pair(Hashes[I], Block)); this->Blocks.push_back(Block); } +this->IndexToBinaryPseudoProbes = IndexToBinaryPseudoProbes; +this->BinaryPseudoProbeToBlock = BinaryPseudoProbeToBlock; this->YamlBFGUID = YamlBFGUID; } @@ -234,10 +240,14 @@ class StaleMatcher { using HashBlockPairType = std::pair; std::unordered_map> OpHashToBlocks; std::unordered_map> CallHashToBlocks; - std::vector Blocks; + std::unordered_map> + IndexToBinaryPseudoProbes; + std::unordered_map + BinaryPseudoProbeToBlock; + std::vector Blocks; // If the pseudo probe checksums of the profiled and binary functions are // equal, then the YamlBF's GUID is defined and used to match blocks. - std::optional YamlBFGUID; + uint64_t YamlBFGUID; // Uses OpcodeHash to find the most similar block for a given hash. const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const { @@ -284,7 +294,7 @@ class StaleMatcher { // Searches for the pseudo probe attached to the matched function's block, // ignoring pseudo probes attached to function calls and inlined functions' // blocks. -outs() << "match with pseudo probes\n"; +std::vector BlockPseudoProbes; for (const auto &PseudoProbe : PseudoProbes) { // Ensures that pseudo probe information belongs to the appropriate // function and not an inlined function. @@ -293,11 +303,30 @@ class StaleMatcher { // Skips pseudo probes attached to function calls. if (PseudoProbe.Type != static_cast(PseudoProbeType::Block)) continue; - assert(PseudoProbe.Index < Blocks.size() && - "pseudo probe index out of range"); - return Blocks[PseudoProbe.Index]; + + BlockPseudoProbes.push_back(&PseudoProbe); } -return nullptr; + +// Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo +// probe and binary pseudo probe. +if (BlockPseudoProbes.size() == 0 || BlockPseudoProbes.size() > 1) + return nullptr; + +uint64_t Index = BlockPseudoProbes[0]->Index; +assert(Index < Blocks.size() && "Invalid pseudo probe index"); + +auto It = IndexToBinaryPseudoProbes.find(Index); +assert(It != IndexToBinaryPseudoProbes.end() && + "All blocks should have a pseudo probe"); +if (It->second.size() > 1) + return nullptr; + +const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0]; +auto BinaryPseudoProbeIt = BinaryPseudoProbeToBlock.find(BinaryPseudoProbe); +assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() && + "All binary pseudo probes should belong a binary basic block"); + +return BinaryPseudoProbeIt->second; } }; @@ -491,6 +520,11 @@ size_t matchWeightsByHashes( std::vector CallHashes; std::vector Blocks; std::vector BlendedHashes; + std::unordered_map> + IndexToBinaryPseudoProbes; + std::unordered_map + BinaryPseudoProbeToBlock; + const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder(); for (uint64_t I = 0; I < BlockOrder.size(); I++) { const BinaryBasicBlock *BB = BlockOrder[I]; assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock"); @@ -510,9 +544,27 @@ size_t matchWeightsByHashes( Blocks.push_back(&Func.Blocks[I + 1]); BlendedBlockHash BlendedHash(BB->getHash()); BlendedHashes.push_back(BlendedHash); +if (PseudoProbeDecoder) { + const AddressProbesMap &ProbeMap = + PseudoProbeDecoder->get
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -478,10 +605,31 @@ matchWeightsByHashes(BinaryContext &BC, Blocks.push_back(&Func.Blocks[I + 1]); BlendedBlockHash BlendedHash(BB->getHash()); BlendedHashes.push_back(BlendedHash); +// Collects pseudo probes attached to the BB for use in the StaleMatcher. +if (opts::ProfileUsePseudoProbes && PseudoProbeDecoder) { + const AddressProbesMap &ProbeMap = + PseudoProbeDecoder->getAddress2ProbesMap(); + const uint64_t FuncAddr = BF.getAddress(); + const std::pair &BlockRange = + BB->getInputAddressRange(); + const auto &BlockProbes = + llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first), + ProbeMap.lower_bound(FuncAddr + BlockRange.second)); + for (const auto &[_, Probes] : BlockProbes) { +for (const MCDecodedPseudoProbe &Probe : Probes) { + if (Probe.getInlineTreeNode()->hasInlineSite()) shawbyoung wrote: Just added inlined block pseudo probe matching. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100523 >From 949edfeeecddb315bf95dd82be99c57a4711c30a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes Also adjust the AMDGPU cost. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 +- llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +- 3 files changed, 210 insertions(+), 199 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0b1ecc002ae25..8ae236850b982 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: + case Intrinsic::abs: return true; default: return false; @@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->has16BitInsts() && SLT == MVT::f16) || + if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; @@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: +// TODO: Full rate for i32/i16 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16}; if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; })) NElts = 1; break; + case Intrinsic::abs: +// Expansion takes 2 instructions for VALU +if (SLT == MVT::i16 || SLT == MVT::i32) + InstRate = 2 * getFullRateInstrCost(); +break; } return LT.first * NElts * InstRate; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index f65615b07abc0..b86e99558377b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -14,116 +14,116 @@ define void @abs_nonpoison() { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instructi
[llvm-branch-commits] [libcxx] release/19.x: [libc++] Add missing xlocale.h include on Apple and FreeBSD (#99689) (PR #100604)
https://github.com/philnik777 approved this pull request. https://github.com/llvm/llvm-project/pull/100604 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -478,10 +675,34 @@ matchWeightsByHashes(BinaryContext &BC, Blocks.push_back(&Func.Blocks[I + 1]); BlendedBlockHash BlendedHash(BB->getHash()); BlendedHashes.push_back(BlendedHash); +// Collects pseudo probes attached to the BB for use in the StaleMatcher. +if (opts::ProfileUsePseudoProbes && +opts::StaleMatchingWithBlockPseudoProbes && PseudoProbeDecoder) { + const AddressProbesMap &ProbeMap = + PseudoProbeDecoder->getAddress2ProbesMap(); + const uint64_t FuncAddr = BF.getAddress(); + const std::pair &BlockRange = + BB->getInputAddressRange(); + const auto &BlockProbes = + llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first), + ProbeMap.lower_bound(FuncAddr + BlockRange.second)); + for (const auto &[_, Probes] : BlockProbes) { +for (const MCDecodedPseudoProbe &Probe : Probes) { + if (Probe.getType() != static_cast(PseudoProbeType::Block)) +continue; + if (Probe.getInlineTreeNode()->hasInlineSite()) +Matcher.mapGUIDAndIndexToProbe(Probe.getGuid(), Probe.getIndex(), + &Probe); + else +Matcher.mapIndexToProbe(Probe.getIndex(), &Probe); wlei-llvm wrote: Wondering why we need to use two containers? iiuc, all the probes contain the `GUID`, the top-level probes should contain the function GUID. https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -0,0 +1,62 @@ +## Tests stale block matching with pseudo probes. + +# REQUIRES: system-linux +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \ +# RUN: --print-cfg --funcs=main --profile-ignore-hash=0 --infer-stale-profile --profile-use-pseudo-probes --stale-matching-with-block-pseudo-probes 2>&1 | FileCheck %s + +# CHECK: BOLT-INFO: inference found a pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) responsible for -nan% samples (0 out of 0 stale) + +#--- main.s + .text + .globl main# -- Begin function main + .p2align4, 0x90 + .type main,@function +main: # @main +# %bb.0: + pushq %rbp + movq%rsp, %rbp + movl$0, -4(%rbp) + .pseudoprobe15822663052811949562 1 0 0 main wlei-llvm wrote: Consider to add inlining case? (I guess one big reason we want to use pseudo-probe is to deal with inlining) https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -116,6 +118,11 @@ cl::opt StaleMatchingCostJumpUnknownFTInc( "The cost of increasing an unknown fall-through jump count by one."), cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory)); +cl::opt StaleMatchingWithBlockPseudoProbes( +"stale-matching-with-block-pseudo-probes", +cl::desc("Turns on stale matching with block pseudo probes."), cl::init(3), wlei-llvm wrote: `cl::init(3)` should it be bool value? https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
@@ -266,6 +325,123 @@ class StaleMatcher { } return BestBlock; } + + /// A helper function for logging. + static bool LogErrIfExpr(bool Expr, std::string Message) { wlei-llvm wrote: Nit: `std::string` can this be `StringRef`? https://github.com/llvm/llvm-project/pull/99891 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] DAG: Lower single infinity is.fpclass tests to fcmp (PR #100380)
spavloff wrote: Just as with #100378 the changes in tests demonstrates that the produced code becomes worse. In what cases this patch makes improvement? Can it be limited to such cases? https://github.com/llvm/llvm-project/pull/100380 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)
https://github.com/bogner created https://github.com/llvm/llvm-project/pull/100622 None ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)
https://github.com/bogner created https://github.com/llvm/llvm-project/pull/100623 This makes the binding structure in a DXILResource default to empty and need a separate call to set up, and also moves the unique ID into it since bindings are the only place where those are actually used. This will put us in a better position when dealing with resource handles in libraries. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Justin Bogner (bogner) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/100622.diff 2 Files Affected: - (modified) llvm/include/llvm/Analysis/DXILResource.h (+2-1) - (modified) llvm/lib/Analysis/DXILResource.cpp (+1) ``diff diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index ac1cefd98dbe3..cca5e0f0bd759 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -9,11 +9,12 @@ #ifndef LLVM_ANALYSIS_DXILRESOURCE_H #define LLVM_ANALYSIS_DXILRESOURCE_H -#include "llvm/IR/Metadata.h" #include "llvm/IR/Value.h" #include "llvm/Support/DXILABI.h" namespace llvm { +class MDTuple; + namespace dxil { struct ResourceBinding { diff --git a/llvm/lib/Analysis/DXILResource.cpp b/llvm/lib/Analysis/DXILResource.cpp index cbe634c4b91aa..d47a73c05a3e5 100644 --- a/llvm/lib/Analysis/DXILResource.cpp +++ b/llvm/lib/Analysis/DXILResource.cpp @@ -9,6 +9,7 @@ #include "llvm/Analysis/DXILResource.h" #include "llvm/ADT/APInt.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Metadata.h" using namespace llvm; using namespace dxil; `` https://github.com/llvm/llvm-project/pull/100622 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Justin Bogner (bogner) Changes This makes the binding structure in a DXILResource default to empty and need a separate call to set up, and also moves the unique ID into it since bindings are the only place where those are actually used. This will put us in a better position when dealing with resource handles in libraries. --- Patch is 27.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100623.diff 3 Files Affected: - (modified) llvm/include/llvm/Analysis/DXILResource.h (+41-47) - (modified) llvm/lib/Analysis/DXILResource.cpp (+35-57) - (modified) llvm/unittests/Analysis/DXILResourceTest.cpp (+39-39) ``diff diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index cca5e0f0bd759..d4006ae10837c 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -17,19 +17,22 @@ class MDTuple; namespace dxil { -struct ResourceBinding { - uint32_t Space; - uint32_t LowerBound; - uint32_t Size; - - bool operator==(const ResourceBinding &RHS) const { -return std::tie(Space, LowerBound, Size) == - std::tie(RHS.Space, RHS.LowerBound, RHS.Size); - } - bool operator!=(const ResourceBinding &RHS) const { return !(*this == RHS); } -}; - class ResourceInfo { + struct ResourceBinding { +uint32_t UniqueID; +uint32_t Space; +uint32_t LowerBound; +uint32_t Size; + +bool operator==(const ResourceBinding &RHS) const { + return std::tie(UniqueID, Space, LowerBound, Size) == + std::tie(RHS.UniqueID, RHS.Space, RHS.LowerBound, RHS.Size); +} +bool operator!=(const ResourceBinding &RHS) const { + return !(*this == RHS); +} + }; + struct UAVInfo { bool GloballyCoherent; bool HasCounter; @@ -81,12 +84,11 @@ class ResourceInfo { Value *Symbol; StringRef Name; - ResourceBinding Binding; - uint32_t UniqueID; - dxil::ResourceClass RC; dxil::ResourceKind Kind; + ResourceBinding Binding = {}; + // Resource class dependent properties. // CBuffer, Sampler, and RawBuffer end here. union { @@ -114,70 +116,62 @@ class ResourceInfo { bool isMultiSample() const; ResourceInfo(dxil::ResourceClass RC, dxil::ResourceKind Kind, Value *Symbol, - StringRef Name, ResourceBinding Binding, uint32_t UniqueID) - : Symbol(Symbol), Name(Name), Binding(Binding), UniqueID(UniqueID), -RC(RC), Kind(Kind) {} + StringRef Name) + : Symbol(Symbol), Name(Name), RC(RC), Kind(Kind) {} public: static ResourceInfo SRV(Value *Symbol, StringRef Name, - ResourceBinding Binding, uint32_t UniqueID, dxil::ElementType ElementTy, uint32_t ElementCount, dxil::ResourceKind Kind); - static ResourceInfo RawBuffer(Value *Symbol, StringRef Name, -ResourceBinding Binding, uint32_t UniqueID); + static ResourceInfo RawBuffer(Value *Symbol, StringRef Name); static ResourceInfo StructuredBuffer(Value *Symbol, StringRef Name, - ResourceBinding Binding, - uint32_t UniqueID, uint32_t Stride, - Align Alignment); + uint32_t Stride, Align Alignment); static ResourceInfo Texture2DMS(Value *Symbol, StringRef Name, - ResourceBinding Binding, uint32_t UniqueID, dxil::ElementType ElementTy, uint32_t ElementCount, uint32_t SampleCount); - static ResourceInfo - Texture2DMSArray(Value *Symbol, StringRef Name, ResourceBinding Binding, - uint32_t UniqueID, dxil::ElementType ElementTy, - uint32_t ElementCount, uint32_t SampleCount); + static ResourceInfo Texture2DMSArray(Value *Symbol, StringRef Name, + dxil::ElementType ElementTy, + uint32_t ElementCount, + uint32_t SampleCount); static ResourceInfo UAV(Value *Symbol, StringRef Name, - ResourceBinding Binding, uint32_t UniqueID, dxil::ElementType ElementTy, uint32_t ElementCount, bool GloballyCoherent, bool IsROV, dxil::ResourceKind Kind); static ResourceInfo RWRawBuffer(Value *Symbol, StringRef Name, - ResourceBinding Binding, uint32_t UniqueID, bool GloballyCoherent, bool IsROV); static ResourceInfo RWStructuredBuffer(Value *Symbol, StringRef Name, - ResourceBinding Binding, - uint32
[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)
llvmbot wrote: @llvm/pr-subscribers-backend-directx Author: Justin Bogner (bogner) Changes This makes the binding structure in a DXILResource default to empty and need a separate call to set up, and also moves the unique ID into it since bindings are the only place where those are actually used. This will put us in a better position when dealing with resource handles in libraries. --- Patch is 27.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100623.diff 3 Files Affected: - (modified) llvm/include/llvm/Analysis/DXILResource.h (+41-47) - (modified) llvm/lib/Analysis/DXILResource.cpp (+35-57) - (modified) llvm/unittests/Analysis/DXILResourceTest.cpp (+39-39) ``diff diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h index cca5e0f0bd759..d4006ae10837c 100644 --- a/llvm/include/llvm/Analysis/DXILResource.h +++ b/llvm/include/llvm/Analysis/DXILResource.h @@ -17,19 +17,22 @@ class MDTuple; namespace dxil { -struct ResourceBinding { - uint32_t Space; - uint32_t LowerBound; - uint32_t Size; - - bool operator==(const ResourceBinding &RHS) const { -return std::tie(Space, LowerBound, Size) == - std::tie(RHS.Space, RHS.LowerBound, RHS.Size); - } - bool operator!=(const ResourceBinding &RHS) const { return !(*this == RHS); } -}; - class ResourceInfo { + struct ResourceBinding { +uint32_t UniqueID; +uint32_t Space; +uint32_t LowerBound; +uint32_t Size; + +bool operator==(const ResourceBinding &RHS) const { + return std::tie(UniqueID, Space, LowerBound, Size) == + std::tie(RHS.UniqueID, RHS.Space, RHS.LowerBound, RHS.Size); +} +bool operator!=(const ResourceBinding &RHS) const { + return !(*this == RHS); +} + }; + struct UAVInfo { bool GloballyCoherent; bool HasCounter; @@ -81,12 +84,11 @@ class ResourceInfo { Value *Symbol; StringRef Name; - ResourceBinding Binding; - uint32_t UniqueID; - dxil::ResourceClass RC; dxil::ResourceKind Kind; + ResourceBinding Binding = {}; + // Resource class dependent properties. // CBuffer, Sampler, and RawBuffer end here. union { @@ -114,70 +116,62 @@ class ResourceInfo { bool isMultiSample() const; ResourceInfo(dxil::ResourceClass RC, dxil::ResourceKind Kind, Value *Symbol, - StringRef Name, ResourceBinding Binding, uint32_t UniqueID) - : Symbol(Symbol), Name(Name), Binding(Binding), UniqueID(UniqueID), -RC(RC), Kind(Kind) {} + StringRef Name) + : Symbol(Symbol), Name(Name), RC(RC), Kind(Kind) {} public: static ResourceInfo SRV(Value *Symbol, StringRef Name, - ResourceBinding Binding, uint32_t UniqueID, dxil::ElementType ElementTy, uint32_t ElementCount, dxil::ResourceKind Kind); - static ResourceInfo RawBuffer(Value *Symbol, StringRef Name, -ResourceBinding Binding, uint32_t UniqueID); + static ResourceInfo RawBuffer(Value *Symbol, StringRef Name); static ResourceInfo StructuredBuffer(Value *Symbol, StringRef Name, - ResourceBinding Binding, - uint32_t UniqueID, uint32_t Stride, - Align Alignment); + uint32_t Stride, Align Alignment); static ResourceInfo Texture2DMS(Value *Symbol, StringRef Name, - ResourceBinding Binding, uint32_t UniqueID, dxil::ElementType ElementTy, uint32_t ElementCount, uint32_t SampleCount); - static ResourceInfo - Texture2DMSArray(Value *Symbol, StringRef Name, ResourceBinding Binding, - uint32_t UniqueID, dxil::ElementType ElementTy, - uint32_t ElementCount, uint32_t SampleCount); + static ResourceInfo Texture2DMSArray(Value *Symbol, StringRef Name, + dxil::ElementType ElementTy, + uint32_t ElementCount, + uint32_t SampleCount); static ResourceInfo UAV(Value *Symbol, StringRef Name, - ResourceBinding Binding, uint32_t UniqueID, dxil::ElementType ElementTy, uint32_t ElementCount, bool GloballyCoherent, bool IsROV, dxil::ResourceKind Kind); static ResourceInfo RWRawBuffer(Value *Symbol, StringRef Name, - ResourceBinding Binding, uint32_t UniqueID, bool GloballyCoherent, bool IsROV); static ResourceInfo RWStructuredBuffer(Value *Symbol, StringRef Name, - ResourceBinding Binding, - uint
[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)
https://github.com/python3kgae approved this pull request. https://github.com/llvm/llvm-project/pull/100622 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)
https://github.com/bob80905 approved this pull request. https://github.com/llvm/llvm-project/pull/100622 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/100622 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Replace #include with forward declaration. NFC (PR #100622)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/100622 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/100623 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)
https://github.com/bogner updated https://github.com/llvm/llvm-project/pull/100623 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)
https://github.com/bob80905 approved this pull request. LGTM, makes sense. Weird how github highlights `UniqueID`, maybe it's some sort of special keyword? It might be worth renaming, but very small nit. https://github.com/llvm/llvm-project/pull/100623 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [DXIL][Analysis] Make the DXILResource binding optional. NFC (PR #100623)
https://github.com/damyanp approved this pull request. https://github.com/llvm/llvm-project/pull/100623 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Normalize ptrauth handling in sanitizer runtime (#100483) (PR #100634)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/100634 Backport cc4f98979b079b517edd8a71f56a8975f436e63d Requested by: @asl >From 018a8c72cc75b9bb4dcb88a07bdda31454c78ca1 Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Thu, 25 Jul 2024 11:57:46 -0700 Subject: [PATCH] Normalize ptrauth handling in sanitizer runtime (#100483) 1. Include `ptrauth.h` if `ptrauth_intrinsics` language feature is specified (per ptrauth spec, this is what enables `ptrauh.h` usage and functions like `ptrauth_strip`) 2. For PAC-RET fallback implement two changes: 1. Switch to macro, so we can ignore key argument 2. Ensure the unsigned value is erased from LR, so the possibility of gadget reuse is reduced. Fixes #100467 (cherry picked from commit cc4f98979b079b517edd8a71f56a8975f436e63d) --- .../lib/sanitizer_common/sanitizer_ptrauth.h | 46 ++- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h index 5200354694851..b5215c0d49c06 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h @@ -9,31 +9,33 @@ #ifndef SANITIZER_PTRAUTH_H #define SANITIZER_PTRAUTH_H -#if __has_feature(ptrauth_calls) -#include +#if __has_feature(ptrauth_intrinsics) +# include #elif defined(__ARM_FEATURE_PAC_DEFAULT) && !defined(__APPLE__) -inline unsigned long ptrauth_strip(void* __value, unsigned int __key) { - // On the stack the link register is protected with Pointer - // Authentication Code when compiled with -mbranch-protection. - // Let's stripping the PAC unconditionally because xpaclri is in - // the NOP space so will do nothing when it is not enabled or not available. - unsigned long ret; - asm volatile( - "mov x30, %1\n\t" - "hint #7\n\t" // xpaclri - "mov %0, x30\n\t" - : "=r"(ret) - : "r"(__value) - : "x30"); - return ret; -} -#define ptrauth_auth_data(__value, __old_key, __old_data) __value -#define ptrauth_string_discriminator(__string) ((int)0) +// On the stack the link register is protected with Pointer +// Authentication Code when compiled with -mbranch-protection. +// Let's stripping the PAC unconditionally because xpaclri is in +// the NOP space so will do nothing when it is not enabled or not available. +# define ptrauth_strip(__value, __key) \ +({ \ + unsigned long ret;\ + asm volatile( \ + "mov x30, %1\n\t" \ + "hint #7\n\t" \ + "mov %0, x30\n\t" \ + "mov x30, xzr\n\t"\ + : "=r"(ret) \ + : "r"(__value)\ + : "x30"); \ + ret; \ +}) +# define ptrauth_auth_data(__value, __old_key, __old_data) __value +# define ptrauth_string_discriminator(__string) ((int)0) #else // Copied from -#define ptrauth_strip(__value, __key) __value -#define ptrauth_auth_data(__value, __old_key, __old_data) __value -#define ptrauth_string_discriminator(__string) ((int)0) +# define ptrauth_strip(__value, __key) __value +# define ptrauth_auth_data(__value, __old_key, __old_data) __value +# define ptrauth_string_discriminator(__string) ((int)0) #endif #define STRIP_PAC_PC(pc) ((uptr)ptrauth_strip(pc, 0)) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Normalize ptrauth handling in sanitizer runtime (#100483) (PR #100634)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/100634 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Normalize ptrauth handling in sanitizer runtime (#100483) (PR #100634)
llvmbot wrote: @DanielKristofKiss What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/100634 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/19.x: Normalize ptrauth handling in sanitizer runtime (#100483) (PR #100634)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: None (llvmbot) Changes Backport cc4f98979b079b517edd8a71f56a8975f436e63d Requested by: @asl --- Full diff: https://github.com/llvm/llvm-project/pull/100634.diff 1 Files Affected: - (modified) compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h (+24-22) ``diff diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h index 5200354694851..b5215c0d49c06 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h @@ -9,31 +9,33 @@ #ifndef SANITIZER_PTRAUTH_H #define SANITIZER_PTRAUTH_H -#if __has_feature(ptrauth_calls) -#include +#if __has_feature(ptrauth_intrinsics) +# include #elif defined(__ARM_FEATURE_PAC_DEFAULT) && !defined(__APPLE__) -inline unsigned long ptrauth_strip(void* __value, unsigned int __key) { - // On the stack the link register is protected with Pointer - // Authentication Code when compiled with -mbranch-protection. - // Let's stripping the PAC unconditionally because xpaclri is in - // the NOP space so will do nothing when it is not enabled or not available. - unsigned long ret; - asm volatile( - "mov x30, %1\n\t" - "hint #7\n\t" // xpaclri - "mov %0, x30\n\t" - : "=r"(ret) - : "r"(__value) - : "x30"); - return ret; -} -#define ptrauth_auth_data(__value, __old_key, __old_data) __value -#define ptrauth_string_discriminator(__string) ((int)0) +// On the stack the link register is protected with Pointer +// Authentication Code when compiled with -mbranch-protection. +// Let's stripping the PAC unconditionally because xpaclri is in +// the NOP space so will do nothing when it is not enabled or not available. +# define ptrauth_strip(__value, __key) \ +({ \ + unsigned long ret;\ + asm volatile( \ + "mov x30, %1\n\t" \ + "hint #7\n\t" \ + "mov %0, x30\n\t" \ + "mov x30, xzr\n\t"\ + : "=r"(ret) \ + : "r"(__value)\ + : "x30"); \ + ret; \ +}) +# define ptrauth_auth_data(__value, __old_key, __old_data) __value +# define ptrauth_string_discriminator(__string) ((int)0) #else // Copied from -#define ptrauth_strip(__value, __key) __value -#define ptrauth_auth_data(__value, __old_key, __old_data) __value -#define ptrauth_string_discriminator(__string) ((int)0) +# define ptrauth_strip(__value, __key) __value +# define ptrauth_auth_data(__value, __old_key, __old_data) __value +# define ptrauth_string_discriminator(__string) ((int)0) #endif #define STRIP_PAC_PC(pc) ((uptr)ptrauth_strip(pc, 0)) `` https://github.com/llvm/llvm-project/pull/100634 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit (PR #99283)
https://github.com/yuxuanchen1997 updated https://github.com/llvm/llvm-project/pull/99283 >From d42ce99fac00de6d35e423490f2603796a10 Mon Sep 17 00:00:00 2001 From: Yuxuan Chen Date: Mon, 15 Jul 2024 15:01:39 -0700 Subject: [PATCH] Implement noalloc in CoroSplit --- llvm/lib/Transforms/Coroutines/CoroInternal.h | 4 + llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 123 ++ llvm/lib/Transforms/Coroutines/Coroutines.cpp | 27 llvm/test/Transforms/Coroutines/ArgAddr.ll| 2 +- .../Transforms/Coroutines/coro-alloca-07.ll | 2 +- .../coro-alloca-loop-carried-address.ll | 2 +- .../Coroutines/coro-lifetime-end.ll | 6 +- .../Coroutines/coro-spill-after-phi.ll| 2 +- .../Transforms/Coroutines/coro-split-00.ll| 7 + 9 files changed, 142 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 5716fd0ea4ab9..d91cccd99a703 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -26,6 +26,10 @@ bool declaresIntrinsics(const Module &M, const std::initializer_list); void replaceCoroFree(CoroIdInst *CoroId, bool Elide); +void suppressCoroAllocs(CoroIdInst *CoroId); +void suppressCoroAllocs(LLVMContext &Context, +ArrayRef CoroAllocs); + /// Attempts to rewrite the location operand of debug intrinsics in terms of /// the coroutine frame pointer, folding pointer offsets into the DIExpression /// of the intrinsic. diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 9e4da5f8ca961..9c0db4f29056e 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/CFG.h" @@ -1179,6 +1180,14 @@ static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); } +static TypeSize getFrameSizeForShape(coro::Shape &Shape) { + // In the same function all coro.sizes should have the same result type. + auto *SizeIntrin = Shape.CoroSizes.back(); + Module *M = SizeIntrin->getModule(); + const DataLayout &DL = M->getDataLayout(); + return DL.getTypeAllocSize(Shape.FrameTy); +} + static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); @@ -1194,10 +1203,8 @@ static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { // In the same function all coro.sizes should have the same result type. auto *SizeIntrin = Shape.CoroSizes.back(); - Module *M = SizeIntrin->getModule(); - const DataLayout &DL = M->getDataLayout(); - auto Size = DL.getTypeAllocSize(Shape.FrameTy); - auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + auto *SizeConstant = + ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape)); for (CoroSizeInst *CS : Shape.CoroSizes) { CS->replaceAllUsesWith(SizeConstant); @@ -1455,6 +1462,64 @@ struct SwitchCoroutineSplitter { setCoroInfo(F, Shape, Clones); } + static Function *createNoAllocVariant(Function &F, coro::Shape &Shape, +SmallVectorImpl &Clones) { +auto *OrigFnTy = F.getFunctionType(); +auto OldParams = OrigFnTy->params(); + +SmallVector NewParams; +NewParams.reserve(OldParams.size() + 1); +for (Type *T : OldParams) { + NewParams.push_back(T); +} +NewParams.push_back(PointerType::getUnqual(Shape.FrameTy)); + +auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams, + OrigFnTy->isVarArg()); +Function *NoAllocF = +Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc"); +ValueToValueMapTy VMap; +unsigned int Idx = 0; +for (const auto &I : F.args()) { + VMap[&I] = NoAllocF->getArg(Idx++); +} +SmallVector Returns; +CloneFunctionInto(NoAllocF, &F, VMap, + CloneFunctionChangeType::LocalChangesOnly, Returns); + +if (Shape.CoroBegin) { + auto *NewCoroBegin = + cast_if_present(VMap[Shape.CoroBegin]); + auto *NewCoroId = cast(NewCoroBegin->getId()); + coro::replaceCoroFree(NewCoroId, /*Elide=*/true); + coro::suppressCoroAllocs(NewCoroId); + NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(Idx)); + NewCoroBegin->eraseFromParent(); +} + +Module *M = F.getParent(); +M->getFunctionList().insert(M->end(), NoAllocF); + +removeUnreachableBlocks(*NoAllocF); +auto NewAttrs = NoAllocF->getAttributes(); +// We just
[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for cost of abs intrinsics (PR #100522)
arsenm wrote: ### Merge activity * **Jul 25, 4:25 PM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100522). https://github.com/llvm/llvm-project/pull/100522 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mul overflow ISD nodes (PR #100519)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100519 >From 3d683da35b98db6dd0b5a94692b735765a6f776f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:27:54 +0400 Subject: [PATCH] TTI: Check legalization cost of mul overflow ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 67 +- .../Analysis/CostModel/X86/arith-overflow.ll | 120 +- 2 files changed, 96 insertions(+), 91 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index a89d4fe467eb9..314390aee5085 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2192,37 +2192,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBO; break; case Intrinsic::smul_with_overflow: -case Intrinsic::umul_with_overflow: { - Type *MulTy = RetTy->getContainedType(0); - Type *OverflowTy = RetTy->getContainedType(1); - unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; - Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); - bool IsSigned = IID == Intrinsic::smul_with_overflow; - - unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - - if (IsSigned) -Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, -CostKind, -{TTI::OK_AnyValue, TTI::OP_None}, -{TTI::OK_UniformConstantValue, TTI::OP_None}); - - Cost += thisT()->getCmpSelInstrCost( - BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); - return Cost; -} + ISD = ISD::SMULO; + break; +case Intrinsic::umul_with_overflow: + ISD = ISD::UMULO; + break; case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) @@ -2367,6 +2341,37 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { OverflowTy, Pred, CostKind); return Cost; } +case Intrinsic::smul_with_overflow: +case Intrinsic::umul_with_overflow: { + Type *MulTy = RetTy->getContainedType(0); + Type *OverflowTy = RetTy->getContainedType(1); + unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; + Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; + + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, ExtTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + + if (IsSigned) +Cost += thisT()->getArithmeticInstrCost( +Instruction::AShr, MulTy, CostKind, +{TTI::OK_AnyValue, TTI::OP_None}, +{TTI::OK_UniformConstantValue, TTI::OP_None}); + + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); + return Cost; +} case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll index c5da46af04367..28d53042d4c21 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -1002,9 +1002,9 @@ define i32 @smul(i32 %arg) { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 148 f
[llvm-branch-commits] [llvm] TTI: Check legalization cost of mulfix ISD nodes (PR #100520)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100520 >From 1d17da3e7cd5253d0c7a9bb8acc5989d1e5ba615 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:31:04 +0400 Subject: [PATCH] TTI: Check legalization cost of mulfix ISD nodes --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 53 +--- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 314390aee5085..1a089a3fa9634 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2155,30 +2155,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ISD = ISD::USUBSAT; break; case Intrinsic::smul_fix: -case Intrinsic::umul_fix: { - unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; - Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); - - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; - TTI::CastContextHint CCH = TTI::CastContextHint::None; - - InstructionCost Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); - Cost += - thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); - Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, -CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, - CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, - {TTI::OK_AnyValue, TTI::OP_None}, - {TTI::OK_UniformConstantValue, TTI::OP_None}); - Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); - return Cost; -} + ISD = ISD::SMULFIX; + break; +case Intrinsic::umul_fix: + ISD = ISD::UMULFIX; + break; case Intrinsic::sadd_with_overflow: ISD = ISD::SADDO; break; @@ -2413,6 +2394,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } +case Intrinsic::smul_fix: +case Intrinsic::umul_fix: { + unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; + Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); + + unsigned ExtOp = + IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; + + InstructionCost Cost = 0; + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); + Cost += + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, +CCH, CostKind); + Cost += thisT()->getArithmeticInstrCost( + Instruction::LShr, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost( + Instruction::Shl, RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); + return Cost; +} default: break; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100523 >From 49db2b2b9855d18df6449b6dedf7e50ccc1d6265 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes Also adjust the AMDGPU cost. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 +- llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +- 3 files changed, 210 insertions(+), 199 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index ba70498bfb731..65f929369c1f0 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2116,20 +2116,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2398,6 +2387,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 0b1ecc002ae25..8ae236850b982 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -693,6 +693,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: + case Intrinsic::abs: return true; default: return false; @@ -721,7 +722,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->has16BitInsts() && SLT == MVT::f16) || + if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; @@ -737,10 +738,16 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: +// TODO: Full rate for i32/i16 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16}; if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; })) NElts = 1; break; + case Intrinsic::abs: +// Expansion takes 2 instructions for VALU +if (SLT == MVT::i16 || SLT == MVT::i32) + InstRate = 2 * getFullRateInstrCost(); +break; } return LT.first * NElts * InstRate; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index f65615b07abc0..b86e99558377b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -14,116 +14,116 @@ define void @abs_nonpoison() { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) ; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instructi
[llvm-branch-commits] [MC][NFC] Store MCPseudoProbeFuncDesc::FuncName as StringRef (PR #100655)
https://github.com/aaupov created https://github.com/llvm/llvm-project/pull/100655 Reduces peak RSS in `perf2bolt --profile-use-pseudo-probes` to 16.04GiB. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match blocks with pseudo probes (PR #99891)
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/99891 >From 0274f697376264c2d77816190f9a434f64e79089 Mon Sep 17 00:00:00 2001 From: shawbyoung Date: Mon, 22 Jul 2024 11:56:23 -0700 Subject: [PATCH 01/24] Changed assignment of profiles with pseudo probe index Created using spr 1.3.4 --- bolt/lib/Profile/StaleProfileMatching.cpp | 85 +++ .../X86/match-blocks-with-pseudo-probes.test | 25 ++ 2 files changed, 78 insertions(+), 32 deletions(-) diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp index 4105f626fb5b6..c135ee5ff4837 100644 --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -195,11 +195,15 @@ class StaleMatcher { void init(const std::vector &Blocks, const std::vector &Hashes, const std::vector &CallHashes, -std::optional YamlBFGUID) { +const std::unordered_map> +IndexToBinaryPseudoProbes, +const std::unordered_map +BinaryPseudoProbeToBlock, +const uint64_t YamlBFGUID) { assert(Blocks.size() == Hashes.size() && Hashes.size() == CallHashes.size() && "incorrect matcher initialization"); - for (size_t I = 0; I < Blocks.size(); I++) { FlowBlock *Block = Blocks[I]; uint16_t OpHash = Hashes[I].OpcodeHash; @@ -209,6 +213,8 @@ class StaleMatcher { std::make_pair(Hashes[I], Block)); this->Blocks.push_back(Block); } +this->IndexToBinaryPseudoProbes = IndexToBinaryPseudoProbes; +this->BinaryPseudoProbeToBlock = BinaryPseudoProbeToBlock; this->YamlBFGUID = YamlBFGUID; } @@ -234,10 +240,14 @@ class StaleMatcher { using HashBlockPairType = std::pair; std::unordered_map> OpHashToBlocks; std::unordered_map> CallHashToBlocks; - std::vector Blocks; + std::unordered_map> + IndexToBinaryPseudoProbes; + std::unordered_map + BinaryPseudoProbeToBlock; + std::vector Blocks; // If the pseudo probe checksums of the profiled and binary functions are // equal, then the YamlBF's GUID is defined and used to match blocks. - std::optional YamlBFGUID; + uint64_t YamlBFGUID; // Uses OpcodeHash to find the most similar block for a given hash. const FlowBlock *matchWithOpcodes(BlendedBlockHash BlendedHash) const { @@ -284,7 +294,7 @@ class StaleMatcher { // Searches for the pseudo probe attached to the matched function's block, // ignoring pseudo probes attached to function calls and inlined functions' // blocks. -outs() << "match with pseudo probes\n"; +std::vector BlockPseudoProbes; for (const auto &PseudoProbe : PseudoProbes) { // Ensures that pseudo probe information belongs to the appropriate // function and not an inlined function. @@ -293,11 +303,30 @@ class StaleMatcher { // Skips pseudo probes attached to function calls. if (PseudoProbe.Type != static_cast(PseudoProbeType::Block)) continue; - assert(PseudoProbe.Index < Blocks.size() && - "pseudo probe index out of range"); - return Blocks[PseudoProbe.Index]; + + BlockPseudoProbes.push_back(&PseudoProbe); } -return nullptr; + +// Returns nullptr if there is not a 1:1 mapping of the yaml block pseudo +// probe and binary pseudo probe. +if (BlockPseudoProbes.size() == 0 || BlockPseudoProbes.size() > 1) + return nullptr; + +uint64_t Index = BlockPseudoProbes[0]->Index; +assert(Index < Blocks.size() && "Invalid pseudo probe index"); + +auto It = IndexToBinaryPseudoProbes.find(Index); +assert(It != IndexToBinaryPseudoProbes.end() && + "All blocks should have a pseudo probe"); +if (It->second.size() > 1) + return nullptr; + +const MCDecodedPseudoProbe *BinaryPseudoProbe = It->second[0]; +auto BinaryPseudoProbeIt = BinaryPseudoProbeToBlock.find(BinaryPseudoProbe); +assert(BinaryPseudoProbeIt != BinaryPseudoProbeToBlock.end() && + "All binary pseudo probes should belong a binary basic block"); + +return BinaryPseudoProbeIt->second; } }; @@ -491,6 +520,11 @@ size_t matchWeightsByHashes( std::vector CallHashes; std::vector Blocks; std::vector BlendedHashes; + std::unordered_map> + IndexToBinaryPseudoProbes; + std::unordered_map + BinaryPseudoProbeToBlock; + const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder(); for (uint64_t I = 0; I < BlockOrder.size(); I++) { const BinaryBasicBlock *BB = BlockOrder[I]; assert(BB->getHash() != 0 && "empty hash of BinaryBasicBlock"); @@ -510,9 +544,27 @@ size_t matchWeightsByHashes( Blocks.push_back(&Func.Blocks[I + 1]); BlendedBlockHash BlendedHash(BB->getHash()); BlendedHashes.push_back(BlendedHash); +if (PseudoProbeDecoder) { + const AddressProbesMap &ProbeMap = + PseudoProbeDecoder->get