10.0.0: apply ispc recommended patches

Naveen Saini Fri, 27 Aug 2021 00:14:50 -0700

https://github.com/ispc/ispc/tree/v1.16.1/llvm_patches


Signed-off-by: Naveen Saini <[email protected]>
---
 ...vm10-0008-ispc-10_0_9_0_fix_for_1767.patch |  96 ++++++++++
 .../llvm10-0009-ispc-10_0_fix_for_1788.patch  | 105 +++++++++++
 .../llvm10-0010-ispc-10_0_fix_for_1793.patch  |  43 +++++
 .../llvm10-0011-ispc-10_0_fix_for_1844.patch  |  34 ++++
 ...2-ispc-10_0_i8_shuffle_avx512_i8_i16.patch |  40 ++++
 ...13-ispc-10_0_k_reg_mov_avx512_i8_i16.patch |  61 ++++++
 ...spc-10_0_packed_load_store_avx512skx.patch |  97 ++++++++++
 ...-ispc-10_0_vXi1calling_avx512_i8_i16.patch | 173 ++++++++++++++++++
 .../clang/llvm-project-source.bbappend        |   8 +
 9 files changed, 657 insertions(+)
 create mode 100644 
dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch
 create mode 100644 
dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch
 create mode 100644 
dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch
 create mode 100644 
dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch
 create mode 100644 
dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch
 create mode 100644 
dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch
 create mode 100644 
dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch
 create mode 100644 
dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch

diff --git 
a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch
 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch
new file mode 100644
index 00000000..7d06a884
--- /dev/null
+++ 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch
@@ -0,0 +1,96 @@
+From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001
+From: Naveen Saini <[email protected]>
+Date: Fri, 27 Aug 2021 11:53:27 +0800
+Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in
+ #1767
+
+It is a port of the following llvm 11.0 commit : 
https://reviews.llvm.org/D76994.
+
+Upstream-Status: Backport [Taken from 
ispc,https://github.com/ispc/ispc/tree/v1.16.1/llvm_patches]
+
+Signed-off-by: Naveen Saini <[email protected]>
+---
+ .../CodeGen/SelectionDAG/LegalizeTypes.cpp    |  3 +-
+ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++-------
+ 2 files changed, 21 insertions(+), 13 deletions(-)
+
+diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+index 63ddb59fce68..822da2183269 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
++++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+@@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
+       }
+     }
+   }
+-
++#ifndef NDEBUG
+   // Checked that NewNodes are only used by other NewNodes.
+   for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+     SDNode *N = NewNodes[i];
+@@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
+          UI != UE; ++UI)
+       assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+   }
++#endif
+ }
+ 
+ /// This is the main entry point for the type legalizer. This does a top-down
+diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+index faae14444d51..b908c5c58e9f 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
++++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+@@ -155,7 +155,9 @@ private:
+   const SDValue &getSDValue(TableId &Id) {
+     RemapId(Id);
+     assert(Id && "TableId should be non-zero");
+-    return IdToValueMap[Id];
++    auto I = IdToValueMap.find(Id);
++    assert(I != IdToValueMap.end() && "cannot find Id in map");
++    return I->second;
+   }
+ 
+ public:
+@@ -172,24 +174,29 @@ public:
+   bool run();
+ 
+   void NoteDeletion(SDNode *Old, SDNode *New) {
++    assert(Old != New && "node replaced with self");
+     for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+       TableId NewId = getTableId(SDValue(New, i));
+       TableId OldId = getTableId(SDValue(Old, i));
+ 
+-      if (OldId != NewId)
++      if (OldId != NewId) {
+         ReplacedValues[OldId] = NewId;
+ 
+-      // Delete Node from tables.
++        // Delete Node from tables.  We cannot do this when OldId == NewId,
++        // because NewId can still have table references to it in
++        // ReplacedValues.
++        IdToValueMap.erase(OldId);
++        PromotedIntegers.erase(OldId);
++        ExpandedIntegers.erase(OldId);
++        SoftenedFloats.erase(OldId);
++        PromotedFloats.erase(OldId);
++        ExpandedFloats.erase(OldId);
++        ScalarizedVectors.erase(OldId);
++        SplitVectors.erase(OldId);
++        WidenedVectors.erase(OldId);
++      }
++
+       ValueToIdMap.erase(SDValue(Old, i));
+-      IdToValueMap.erase(OldId);
+-      PromotedIntegers.erase(OldId);
+-      ExpandedIntegers.erase(OldId);
+-      SoftenedFloats.erase(OldId);
+-      PromotedFloats.erase(OldId);
+-      ExpandedFloats.erase(OldId);
+-      ScalarizedVectors.erase(OldId);
+-      SplitVectors.erase(OldId);
+-      WidenedVectors.erase(OldId);
+     }
+   }
+ 
+-- 
+2.17.1
+
diff --git 
a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch
 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch
new file mode 100644
index 00000000..30395101
--- /dev/null
+++ 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch
@@ -0,0 +1,105 @@
+From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <[email protected]>
+Date: Fri, 27 Aug 2021 11:56:01 +0800
+Subject: [PATCH 2/2] This patch is a fix for #1788.
+
+It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698
+This also needed part of another llvm 11.0 commit: 
https://reviews.llvm.org/D72975
+
+Upstream-Status: Backport [Taken from 
ispc,https://github.com/ispc/ispc/tree/v1.16.1/llvm_patches]
+
+Signed-off-by: Naveen Saini <[email protected]>
+---
+ llvm/include/llvm/IR/PatternMatch.h           | 22 ++++++++++++---
+ .../InstCombine/InstructionCombining.cpp      | 27 +++++++++++++++++--
+ 2 files changed, 44 insertions(+), 5 deletions(-)
+
+diff --git a/llvm/include/llvm/IR/PatternMatch.h 
b/llvm/include/llvm/IR/PatternMatch.h
+index 6621fc9f819c..fb7ad93519f6 100644
+--- a/llvm/include/llvm/IR/PatternMatch.h
++++ b/llvm/include/llvm/IR/PatternMatch.h
+@@ -152,8 +152,10 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy 
&L, const RTy &R) {
+ 
+ struct apint_match {
+   const APInt *&Res;
++  bool AllowUndef;
+ 
+-  apint_match(const APInt *&R) : Res(R) {}
++  apint_match(const APInt *&Res, bool AllowUndef)
++    : Res(Res), AllowUndef(AllowUndef) {}
+ 
+   template <typename ITy> bool match(ITy *V) {
+     if (auto *CI = dyn_cast<ConstantInt>(V)) {
+@@ -162,7 +164,8 @@ struct apint_match {
+     }
+     if (V->getType()->isVectorTy())
+       if (const auto *C = dyn_cast<Constant>(V))
+-        if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
++       if (auto *CI = dyn_cast_or_null<ConstantInt>(
++                C->getSplatValue(AllowUndef))) {
+           Res = &CI->getValue();
+           return true;
+         }
+@@ -192,7 +195,20 @@ struct apfloat_match {
+ 
+ /// Match a ConstantInt or splatted ConstantVector, binding the
+ /// specified pointer to the contained APInt.
+-inline apint_match m_APInt(const APInt *&Res) { return Res; }
++inline apint_match m_APInt(const APInt *&Res) {
++  // Forbid undefs by default to maintain previous behavior.
++  return apint_match(Res, /* AllowUndef */ false);
++}
++
++/// Match APInt while allowing undefs in splat vector constants.
++inline apint_match m_APIntAllowUndef(const APInt *&Res) {
++  return apint_match(Res, /* AllowUndef */ true);
++}
++
++/// Match APInt while forbidding undefs in splat vector constants.
++inline apint_match m_APIntForbidUndef(const APInt *&Res) {
++  return apint_match(Res, /* AllowUndef */ false);
++}
+ 
+ /// Match a ConstantFP or splatted ConstantVector, binding the
+ /// specified pointer to the contained APFloat.
+diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp 
b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+index bf32996d96e2..40a246b9d7a7 100644
+--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
++++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+@@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction 
&Op, SelectInst *SI) {
+   if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
+     if (CI->hasOneUse()) {
+       Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+-      if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+-          (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
++
++      // FIXME: This is a hack to avoid infinite looping with min/max 
patterns.
++      //        We have to ensure that vector constants that only differ with
++      //        undef elements are treated as equivalent.
++      auto areLooselyEqual = [](Value *A, Value *B) {
++        if (A == B)
++          return true;
++
++        // Test for vector constants.
++        Constant *ConstA, *ConstB;
++        if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
++          return false;
++
++        // TODO: Deal with FP constants?
++        if (!A->getType()->isIntOrIntVectorTy() || A->getType() != 
B->getType())
++          return false;
++
++        // Compare for equality including undefs as equal.
++        auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, 
ConstB);
++        const APInt *C;
++        return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
++      };
++
++      if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
++          (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
+         return nullptr;
+     }
+   }
+-- 
+2.17.1
+
diff --git 
a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch
 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch
new file mode 100644
index 00000000..027477f5
--- /dev/null
+++ 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch
@@ -0,0 +1,43 @@
+From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <[email protected]>
+Date: Fri, 27 Aug 2021 12:00:23 +0800
+Subject: [PATCH 1/2] This patch is required to fix stability problem #1793
+
+It's backport of the following llvm 11.0 commit: 
120c5f1057dc50229f73bc75bbabf4df6ee50fef
+
+Upstream-Status: Backport [Taken from 
ispc,https://github.com/ispc/ispc/blob/v1.16.1/llvm_patches/10_0_fix_for_1793.patch]
+
+Signed-off-by: Naveen Saini <[email protected]>
+---
+ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+index 2476fd26f250..2743acc89bca 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+@@ -10702,8 +10702,9 @@ SDValue 
DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
+   SDValue N0 = N->getOperand(0);
+   EVT VT = N->getValueType(0);
+ 
++  // zext_vector_inreg(undef) = 0 because the top bits will be zero.
+   if (N0.isUndef())
+-    return DAG.getUNDEF(VT);
++    return DAG.getConstant(0, SDLoc(N), VT);
+ 
+   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+     return Res;
+@@ -10718,8 +10719,9 @@ SDValue 
DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
+   SDValue N0 = N->getOperand(0);
+   EVT VT = N->getValueType(0);
+ 
++  // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
+   if (N0.isUndef())
+-    return DAG.getUNDEF(VT);
++    return DAG.getConstant(0, SDLoc(N), VT);
+ 
+   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
+     return Res;
+-- 
+2.17.1
+
diff --git 
a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch
 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch
new file mode 100644
index 00000000..0e47af80
--- /dev/null
+++ 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch
@@ -0,0 +1,34 @@
+From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <[email protected]>
+Date: Fri, 27 Aug 2021 12:02:37 +0800
+Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and
+ avx512skx-i16x32.
+
+It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b
+https://reviews.llvm.org/D76312
+
+Upstream-Status: Backport [Taken from 
ispc,https://github.com/ispc/ispc/blob/v1.16.1/llvm_patches/10_0_fix_for_1844.patch]
+
+Signed-off-by: Naveen Saini <[email protected]>
+---
+ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+index 2743acc89bca..439a8367dabe 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+@@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ 
+   // Attempt to pre-truncate BUILD_VECTOR sources.
+   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
+-      TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
++      TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
++      // Avoid creating illegal types if running after type legalizer.
++      (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
+     SDLoc DL(N);
+     EVT SVT = VT.getScalarType();
+     SmallVector<SDValue, 8> TruncOps;
+-- 
+2.17.1
+
diff --git 
a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch
 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch
new file mode 100644
index 00000000..89c0a8d2
--- /dev/null
+++ 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch
@@ -0,0 +1,40 @@
+From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <[email protected]>
+Date: Fri, 27 Aug 2021 12:07:25 +0800
+Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive
+ createVariablePermute call fails
+
+Account for the case where a recursive createVariablePermute call with a wider 
vector type fails.
+
+Original test case from @craig.topper (Craig Topper)
+
+Upstream-Status: Backport [Taken from 
ispc,https://github.com/ispc/ispc/blob/v1.16.1/llvm_patches/10_0_i8_shuffle_avx512_i8_i16.patch]
+
+Signed-off-by: Simon Pilgrim <[email protected]>
+Signed-off-by: Naveen Saini <[email protected]>
+---
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
+index c8720d9ae3a6..63eb050e9b3a 100644
+--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
+@@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue 
SrcVec, SDValue IndicesVec,
+       IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
+       IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
+                                   Subtarget, DAG, SDLoc(IndicesVec));
+-      return extractSubVector(
+-          createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 
0,
+-          DAG, DL, SizeInBits);
++      SDValue NewSrcVec =
++          createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
++      if (NewSrcVec)
++        return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
++      return SDValue();
+     } else if (SrcVec.getValueSizeInBits() < SizeInBits) {
+       // Widen smaller SrcVec to match VT.
+       SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, 
SDLoc(SrcVec));
+-- 
+2.17.1
+
diff --git 
a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch
 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch
new file mode 100644
index 00000000..4fb41e26
--- /dev/null
+++ 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch
@@ -0,0 +1,61 @@
+From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001
+From: Naveen Saini <[email protected]>
+Date: Fri, 27 Aug 2021 12:09:42 +0800
+Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and
+ avx512skx-i16x32 targets.
+
+This is combination of two commits:
+- 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix.
+- d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up.
+
+Upstream-Status: Backport [Taken from 
ispc,https://github.com/ispc/ispc/blob/v1.16.1/llvm_patches/10_0_k_reg_mov_avx512_i8_i16.patch]
+
+Signed-off-by: Naveen Saini <[email protected]>
+---
+ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++--
+ 1 file changed, 21 insertions(+), 2 deletions(-)
+
+diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+index 439a8367dabe..b1639c7f275d 100644
+--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+@@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode 
*Extract, SelectionDAG &DAG) {
+ 
+   // Allow targets to opt-out.
+   EVT VT = Extract->getValueType(0);
++
++  // We can only create byte sized loads.
++  if (!VT.isByteSized())
++    return SDValue();
++
++  unsigned Index = ExtIdx->getZExtValue();
++  unsigned NumElts = VT.getVectorNumElements();
++
++  // If the index is a multiple of the extract element count, we can offset 
the
++  // address by the store size multiplied by the subvector index. Otherwise if
++  // the scalar type is byte sized, we can just use the index multiplied by
++  // the element size in bytes as the offset.
++  unsigned Offset;
++  if (Index % NumElts == 0)
++    Offset = (Index / NumElts) * VT.getStoreSize();
++  else if (VT.getScalarType().isByteSized())
++    Offset = Index * VT.getScalarType().getStoreSize();
++  else
++    return SDValue();
++
+   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
+     return SDValue();
+@@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode 
*Extract, SelectionDAG &DAG) {
+   // The narrow load will be offset from the base address of the old load if
+   // we are extracting from something besides index 0 (little-endian).
+   SDLoc DL(Extract);
+-  SDValue BaseAddr = Ld->getOperand(1);
+-  unsigned Offset = ExtIdx->getZExtValue() * 
VT.getScalarType().getStoreSize();
++  SDValue BaseAddr = Ld->getBasePtr();
+ 
+   // TODO: Use "BaseIndexOffset" to make this more effective.
+   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+-- 
+2.17.1
+
diff --git 
a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch
 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch
new file mode 100644
index 00000000..259171b4
--- /dev/null
+++ 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch
@@ -0,0 +1,97 @@
+From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001
+From: Naveen Saini <[email protected]>
+Date: Fri, 27 Aug 2021 12:13:00 +0800
+Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
+ store zeros in the rest of the byte
+
+We can't store garbage in the unused bits. It possible that something like 
zextload from i1/i2/i4 is created to read the memory. Those zextloads would be 
legalized assuming the extra bits are 0.
+
+I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 
case. It looks like the DAG combine in combineStore may have converted them to 
v8i1 first. And I think we're missing some cases to avoid going to the stack in 
the first place. But I don't have time to investigate those things at the 
moment so I wanted to focus on the correctness issue.
+
+Should fix PR48147.
+
+Reviewed By: RKSimon
+
+Differential Revision: https://reviews.llvm.org/D9129
+
+Upstream-Status: Backport [Taken from 
ispc,https://github.com/ispc/ispc/blob/v1.16.1/llvm_patches/10_0_packed_load_store_avx512skx.patch]
+
+Signed-off-by:Craig Topper <[email protected]>
+Signed-off-by: Naveen Saini <[email protected]>
+---
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------
+ llvm/lib/Target/X86/X86InstrAVX512.td   |  2 --
+ 2 files changed, 14 insertions(+), 8 deletions(-)
+
+diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
+index 63eb050e9b3a..96b5e2cfbd82 100644
+--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
+@@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const 
X86Subtarget &Subtarget,
+   // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
+   if (StoredVal.getValueType().isVector() &&
+       StoredVal.getValueType().getVectorElementType() == MVT::i1) {
+-    assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
+-           "Unexpected VT");
++    unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
++    assert(NumElts <= 8 && "Unexpected VT");
+     assert(!St->isTruncatingStore() && "Expected non-truncating store");
+     assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
+            "Expected AVX512F without AVX512DQI");
+ 
++    // We must pad with zeros to ensure we store zeroes to any unused bits.
+     StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+                             DAG.getUNDEF(MVT::v16i1), StoredVal,
+                             DAG.getIntPtrConstant(0, dl));
+     StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
+     StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
++    // Make sure we store zeros in the extra bits.
++    if (NumElts < 8)
++      StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
++                                         MVT::getIntegerVT(NumElts));
+ 
+     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+                         St->getPointerInfo(), St->getAlignment(),
+@@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG 
&DAG,
+ 
+     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), 
VT.getVectorNumElements());
+     StoredVal = DAG.getBitcast(NewVT, StoredVal);
+-
+-    return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
++    SDValue Val = StoredVal.getOperand(0);
++    // We must store zeros to the unused bits.
++    Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
++    return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(),
+                         St->getPointerInfo(), St->getAlignment(),
+                         St->getMemOperand()->getFlags());
+   }
+@@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG 
&DAG,
+   }
+ 
+   // Widen v2i1/v4i1 stores to v8i1.
+-  if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
++  if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
+       Subtarget.hasAVX512()) {
+     unsigned NumConcats = 8 / VT.getVectorNumElements();
+-    SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
++    // We must store zeros to the unused bits.
++    SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
+     Ops[0] = StoredVal;
+     StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
+     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td 
b/llvm/lib/Target/X86/X86InstrAVX512.td
+index 32f012033fb0..d3b92183f87b 100644
+--- a/llvm/lib/Target/X86/X86InstrAVX512.td
++++ b/llvm/lib/Target/X86/X86InstrAVX512.td
+@@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
+ 
+ // Load/store kreg
+ let Predicates = [HasDQI] in {
+-  def : Pat<(store VK1:$src, addr:$dst),
+-            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
+ 
+   def : Pat<(v1i1 (load addr:$src)),
+             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
+-- 
+2.17.1
+
diff --git 
a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
new file mode 100644
index 00000000..ccded994
--- /dev/null
+++ 
b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
@@ -0,0 +1,173 @@
+From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001
+From: Naveen Saini <[email protected]>
+Date: Fri, 27 Aug 2021 12:15:09 +0800
+Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via
+ getRegisterTypeForCallingConv rather than using CCPromoteToType in the td
+ file
+
+    Previously we tried to promote these to xmm/ymm/zmm by promoting
+    in the X86CallingConv.td file. But this breaks when we run out
+    of xmm/ymm/zmm registers and need to fall back to memory. We end
+    up trying to create a non-sensical scalar to vector. This lead
+    to an assertion. The new tests in avx512-calling-conv.ll all
+    trigger this assertion.
+
+    Since we really want to treat these types like we do on avx2,
+    it seems better to promote them before the calling convention
+    code gets involved. Except when the calling convention is one
+    that passes the vXi1 type in a k register.
+
+    The changes in avx512-regcall-Mask.ll are because we indicated
+    that xmm/ymm/zmm types should be passed indirectly for the
+    Win64 ABI before we go to the common lines that promoted the
+    vXi1 types. This caused the promoted types to be picked up by
+    the default calling convention code. Now we promote them earlier
+    so they get passed indirectly as though they were xmm/ymm/zmm.
+
+    Differential Revision: https://reviews.llvm.org/D75154
+
+Upstream-Status: Backport [Taken from 
ispc,https://github.com/ispc/ispc/blob/v1.16.1/llvm_patches/10_0_vXi1calling_avx512_i8_i16.patch]
+
+Signed-off-by:Craig Topper <[email protected]>
+Signed-off-by: Naveen Saini <[email protected]>
+---
+ llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++--------
+ 1 file changed, 61 insertions(+), 29 deletions(-)
+
+diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
+index 96b5e2cfbd82..d5de94aeb8a2 100644
+--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
++++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
+@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) 
const {
+   return TargetLoweringBase::getPreferredVectorAction(VT);
+ }
+ 
++static std::pair<MVT, unsigned>
++handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
++                                 const X86Subtarget &Subtarget) {
++  // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
++  // convention is one that uses k registers.
++  if (NumElts == 2)
++    return {MVT::v2i64, 1};
++  if (NumElts == 4)
++    return {MVT::v4i32, 1};
++  if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
++      CC != CallingConv::Intel_OCL_BI)
++    return {MVT::v8i16, 1};
++  if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
++      CC != CallingConv::Intel_OCL_BI)
++    return {MVT::v16i8, 1};
++  // v32i1 passes in ymm unless we have BWI and the calling convention is
++  // regcall.
++  if (NumElts == 32 && (!Subtarget.hasBWI() || CC != 
CallingConv::X86_RegCall))
++    return {MVT::v32i8, 1};
++  // Split v64i1 vectors if we don't have v64i8 available.
++  if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
++    if (Subtarget.useAVX512Regs())
++      return {MVT::v64i8, 1};
++    return {MVT::v32i8, 2};
++  }
++
++  // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
++  if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
++      NumElts > 64)
++    return {MVT::i8, NumElts};
++
++  return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
++}
++
+ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+                                                      CallingConv::ID CC,
+                                                      EVT VT) const {
+-  // v32i1 vectors should be promoted to v32i8 to match avx2.
+-  if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
+-    return MVT::v32i8;
+-  // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+-      Subtarget.hasAVX512() &&
+-      (!isPowerOf2_32(VT.getVectorNumElements()) ||
+-       (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
+-       (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
+-    return MVT::i8;
+-  // Split v64i1 vectors if we don't have v64i8 available.
+-  if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+-      CC != CallingConv::X86_RegCall)
+-    return MVT::v32i1;
++      Subtarget.hasAVX512()) {
++    unsigned NumElts = VT.getVectorNumElements();
++
++    MVT RegisterVT;
++    unsigned NumRegisters;
++    std::tie(RegisterVT, NumRegisters) =
++        handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
++    if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
++      return RegisterVT;
++  }
++
+   // FIXME: Should we just make these types legal and custom split operations?
+   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
+       Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
+     return MVT::v16i32;
++
+   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+ }
+ 
+ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext 
&Context,
+                                                           CallingConv::ID CC,
+                                                           EVT VT) const {
+-  // v32i1 vectors should be promoted to v32i8 to match avx2.
+-  if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
+-    return 1;
+-  // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
+   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+-      Subtarget.hasAVX512() &&
+-      (!isPowerOf2_32(VT.getVectorNumElements()) ||
+-       (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
+-       (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
+-    return VT.getVectorNumElements();
+-  // Split v64i1 vectors if we don't have v64i8 available.
+-  if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+-      CC != CallingConv::X86_RegCall)
+-    return 2;
++      Subtarget.hasAVX512()) {
++    unsigned NumElts = VT.getVectorNumElements();
++
++    MVT RegisterVT;
++    unsigned NumRegisters;
++    std::tie(RegisterVT, NumRegisters) =
++        handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
++    if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
++      return NumRegisters;
++  }
++
+   // FIXME: Should we just make these types legal and custom split operations?
+   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
+       Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
+     return 1;
++
+   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+ }
+ 
+@@ -2140,8 +2172,8 @@ unsigned 
X86TargetLowering::getVectorTypeBreakdownForCallingConv(
+   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
+       Subtarget.hasAVX512() &&
+       (!isPowerOf2_32(VT.getVectorNumElements()) ||
+-       (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
+-       (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
++       (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
++       VT.getVectorNumElements() > 64)) {
+     RegisterVT = MVT::i8;
+     IntermediateVT = MVT::i1;
+     NumIntermediates = VT.getVectorNumElements();
+@@ -2151,7 +2183,7 @@ unsigned 
X86TargetLowering::getVectorTypeBreakdownForCallingConv(
+   // Split v64i1 vectors if we don't have v64i8 available.
+   if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+       CC != CallingConv::X86_RegCall) {
+-    RegisterVT = MVT::v32i1;
++    RegisterVT = MVT::v32i8;
+     IntermediateVT = MVT::v32i1;
+     NumIntermediates = 2;
+     return 2;
+-- 
+2.17.1
+
diff --git 
a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
 
b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
index b144411d..3f304215 100644
--- 
a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
+++ 
b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
@@ -18,6 +18,14 @@ SRC_URI_LLVM10_PATCHES = " \
                    
file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
                    
file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
                    file://llvm10-0007-support-cl_ext_float_atomics.patch \
+                   file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \
+                   file://llvm10-0009-ispc-10_0_fix_for_1788.patch \
+                   file://llvm10-0010-ispc-10_0_fix_for_1793.patch \
+                   file://llvm10-0011-ispc-10_0_fix_for_1844.patch \
+                   file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch 
\
+                   file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \
+                   
file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \
+                   
file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \
                    "
 
 SRC_URI_LLVM11_PATCHES = " \
-- 
2.27.0

-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#7227): 
https://lists.yoctoproject.org/g/meta-intel/message/7227
Mute This Topic: https://lists.yoctoproject.org/mt/85180879/21656
Group Owner: [email protected]
Unsubscribe: https://lists.yoctoproject.org/g/meta-intel/unsub 
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-

[meta-intel] [PATCH 3/3] llvm/10.0.0: apply ispc recommended patches

Reply via email to