llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-selectiondag

Author: Krzysztof Drewniak (krzysz00)

<details>
<summary>Changes</summary>

Generalize the extract_subvector-of-extract_subvector fold to compose
nonzero indices instead of only handling an outer index of zero.

AI note: an LLM generated the code and the test, I've read them

Co-Authored-By: OpenAI Codex &lt;codex@<!-- -->openai.com&gt;

---

&lt;sub&gt;Stack created with &lt;a 
href="https://github.com/github/gh-stack"&gt;GitHub Stacks CLI&lt;/a&gt; • 
&lt;a href="https://gh.io/stacks-feedback"&gt;Give Feedback 
💬&lt;/a&gt;&lt;/sub&gt;

---
Full diff: https://github.com/llvm/llvm-project/pull/200935.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+8-5) 
- (added) llvm/test/CodeGen/AMDGPU/dagcombine-extract-extract.ll (+43) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0c9820fb64de9..dd74e63744f2e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -27559,17 +27559,20 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode 
*N) {
     return NarrowLoad;
 
   // Combine an extract of an extract into a single extract_subvector.
-  // ext (ext X, C), 0 --> ext X, C
-  if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) 
{
+  // ext (ext X, C1), C2 --> ext X, C1 + C2
+  if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
     // Both indices must have the same scaling factor and C has to be a
     // multiple of the new result type's known minimum vector length.
+    uint64_t InnerExtIdx = V.getConstantOperandVal(1);
+    uint64_t NewExtIdx = InnerExtIdx + ExtIdx;
     if (V.getValueType().isScalableVector() == NVT.isScalableVector() &&
-        V.getConstantOperandVal(1) % NVT.getVectorMinNumElements() == 0 &&
+        NewExtIdx % NVT.getVectorMinNumElements() == 0 &&
         TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
-                                    V.getConstantOperandVal(1)) &&
+                                    NewExtIdx) &&
         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
+      SDValue NewIndex = DAG.getVectorIdxConstant(NewExtIdx, DL);
       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
-                         V.getOperand(1));
+                         NewIndex);
     }
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-extract-extract.ll 
b/llvm/test/CodeGen/AMDGPU/dagcombine-extract-extract.ll
new file mode 100644
index 0000000000000..aa5b2f80542c7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-extract-extract.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \
+; RUN:   -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck %s \
+; RUN:   --check-prefix=COMBINE --implicit-check-not=REG_SEQUENCE
+; RUN: llc -O2 -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 \
+; RUN:   -verify-machineinstrs -combiner-disabled -stop-after=amdgpu-isel < %s 
\
+; RUN:   | FileCheck %s --check-prefix=NOCOMBINE
+
+declare <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32>, i64 immarg)
+declare <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32>, i64 immarg)
+
+define <2 x i32> @extract_of_extract_nonzero(<8 x i32> %x) nounwind {
+  ; COMBINE-LABEL: name: extract_of_extract_nonzero
+  ; COMBINE: bb.0 (%ir-block.0):
+  ; COMBINE-NEXT:   liveins: $vgpr6, $vgpr7
+  ; COMBINE-NEXT: {{  $}}
+  ; COMBINE-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+  ; COMBINE-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+  ; COMBINE-NEXT:   $vgpr0 = COPY [[COPY1]]
+  ; COMBINE-NEXT:   $vgpr1 = COPY [[COPY]]
+  ; COMBINE-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
+  ;
+  ; NOCOMBINE-LABEL: name: extract_of_extract_nonzero
+  ; NOCOMBINE: bb.0 (%ir-block.0):
+  ; NOCOMBINE-NEXT:   liveins: $vgpr4, $vgpr5, $vgpr6, $vgpr7
+  ; NOCOMBINE-NEXT: {{  $}}
+  ; NOCOMBINE-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+  ; NOCOMBINE-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+  ; NOCOMBINE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+  ; NOCOMBINE-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+  ; NOCOMBINE-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE 
[[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, 
[[COPY]], %subreg.sub3
+  ; NOCOMBINE-NEXT:   [[COPY4:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE]].sub3
+  ; NOCOMBINE-NEXT:   [[COPY5:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE]].sub2
+  ; NOCOMBINE-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:av_64_align2 = REG_SEQUENCE 
killed [[COPY5]], %subreg.sub0, killed [[COPY4]], %subreg.sub1
+  ; NOCOMBINE-NEXT:   [[COPY6:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub0
+  ; NOCOMBINE-NEXT:   [[COPY7:%[0-9]+]]:av_32 = COPY [[REG_SEQUENCE1]].sub1
+  ; NOCOMBINE-NEXT:   $vgpr0 = COPY [[COPY6]]
+  ; NOCOMBINE-NEXT:   $vgpr1 = COPY [[COPY7]]
+  ; NOCOMBINE-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
+  %mid = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %x, i64 4)
+  %out = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %mid, i64 2)
+  ret <2 x i32> %out
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/200935
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to