https://github.com/leo-ard created 
https://github.com/llvm/llvm-project/pull/67594

This PR fixes #55013 : the max intrinsics is not generated for this simple loop 
case : https://godbolt.org/z/hxz1xhMPh. This is caused by a ICMP not being 
folded into a select, thus not generating the max intrinsics. 

Since LLVM 14, SCCP pass got smarter by folding sext into zext for positive 
ranges : https://reviews.llvm.org/D81756. After this change, InstCombine was 
sometimes unable to fold icmp correctly as both of the arguments pointed to 
mismatched zext/sext. To fix this, @rotateright implemented this fix : 
https://reviews.llvm.org/D124419 that tries to resolve the mismatch by knowing 
if the argument of a zext is positive (in which case, it is like a sext) by 
using ValueTracking. However, ValueTracking seems to be not smart enough for 
this case and cannot accurately know that the value is positive or not. This PR 
implements the folding in SCCP directly, where we have the knowledge that the 
value are positive or not.

This PR also contains test cases for sext/zext folding with SCCP as well as a 
x86 regression tests for the max/min case. 

From 399f9d64cfde0761ac8278dd05ba704d879b1f5a Mon Sep 17 00:00:00 2001
From: leo-ard <lool4...@gmail.com>
Date: Wed, 27 Sep 2023 13:35:53 -0400
Subject: [PATCH] Adding ICMP folding for SCCPSolver

---
 clang/test/CodeGen/X86/min_max.c              |  19 ++
 llvm/lib/Transforms/Utils/SCCPSolver.cpp      |  54 +++++
 .../Transforms/SCCP/icmp-fold-with-cast.ll    | 185 ++++++++++++++++++
 llvm/test/Transforms/SCCP/widening.ll         |   4 +-
 4 files changed, 260 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/X86/min_max.c
 create mode 100644 llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll

diff --git a/clang/test/CodeGen/X86/min_max.c b/clang/test/CodeGen/X86/min_max.c
new file mode 100644
index 000000000000000..7af8181cc9ff367
--- /dev/null
+++ b/clang/test/CodeGen/X86/min_max.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -O2 -triple=x86_64-apple-darwin -emit-llvm -o - | 
FileCheck %s
+
+short vecreduce_smax_v2i16(int n, short* v)
+{
+  // CHECK: @llvm.smax
+  short p = 0;
+  for (int i = 0; i < n; ++i)
+    p = p < v[i] ? v[i] : p;
+  return p;
+}
+
+short vecreduce_smin_v2i16(int n, short* v)
+{
+  // CHECK: @llvm.smin
+  short p = 0;
+  for (int i = 0; i < n; ++i)
+    p = p > v[i] ? v[i] : p;
+  return p;
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp 
b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 8a67fda7c98e787..09f64a925ab1b7c 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -193,6 +193,60 @@ static bool replaceSignedInst(SCCPSolver &Solver,
     NewInst = BinaryOperator::Create(NewOpcode, Op0, Op1, "", &Inst);
     break;
   }
+  case Instruction::ICmp: {
+    ICmpInst &ICmp = cast<ICmpInst>(Inst);
+  
+    ZExtInst *Op0_zext = dyn_cast<ZExtInst>(ICmp.getOperand(0));
+    SExtInst *Op0_sext = dyn_cast<SExtInst>(ICmp.getOperand(0));
+  
+    ZExtInst *Op1_zext = dyn_cast<ZExtInst>(ICmp.getOperand(1));
+    SExtInst *Op1_sext = dyn_cast<SExtInst>(ICmp.getOperand(1));
+  
+    CastInst *Op0;
+    CastInst *Op1;
+  
+    if (Op0_zext) Op0 = Op0_zext; else Op0 = Op0_sext;
+    if (Op1_zext) Op1 = Op1_zext; else Op1 = Op1_sext;
+  
+    bool reversed = false;
+  
+    if (!Op0 || !Op1){
+      // Op0 and Op1 must be defined
+      return false;
+    } 
+  
+    if (Op1_zext && (! Op0_zext)){
+      // We force Op0 to be a zext and reverse the arguments
+      //   at the end if we swap
+      reversed = true; 
+  
+      std::swap(Op0_zext, Op1_zext);
+      std::swap(Op0_sext, Op1_sext);
+      std::swap(Op0, Op1);
+    }
+  
+  
+    if(Op0->getType() != Op1->getType()){
+      // extensions are not of the same type
+      // This optimization is done in InstCombine
+      return false;
+    }
+  
+    // ICMP (sext X) (sext y) => ICMP X, Y
+    // ICMP (zext X) (zext y) => ICMP X, Y
+    // ICMP (zext X) (sext Y) => ICMP X, Y  if X >= 0 and ICMP signed
+    if((Op0_zext && Op1_zext)
+       || (Op0_sext && Op1_sext) 
+       || (ICmp.isSigned() && Op0_zext && Op1_sext && 
isNonNegative(Op0_zext->getOperand(0)))){
+  
+      Value *X = Op0->getOperand(0);
+      Value *Y = Op1->getOperand(0);
+      NewInst = CmpInst::Create(ICmp.getOpcode(), ICmp.getPredicate(), 
reversed ? Y : X, reversed ? X : Y, "", &Inst);
+      break;
+    }
+  
+    return false;
+  }
   default:
     return false;
   }
diff --git a/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll 
b/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll
new file mode 100644
index 000000000000000..90b2c123081fb49
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --tool ./bin/opt --version 3
+; See PRXXX for more details
+; RUN: opt < %s -S -passes=ipsccp | FileCheck %s
+
+
+define signext i32 @sext_sext(i16 %x, i16 %y) {
+; CHECK-LABEL: define signext i32 @sext_sext(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[X]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[Y]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i16 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label 
[[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ 1, 
[[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %conv = sext i16 %x to i32
+  %conv1 = sext i16 %y to i32
+  %cmp2 = icmp sgt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, 
%cond.true
+  %cond = phi i32 [ 0, %cond.true ], [ 1, %cond.false ]
+  ret i32 %cond
+}
+
+
+define signext i32 @zext_zext(i16 %x, i16 %y) {
+; CHECK-LABEL: define signext i32 @zext_zext(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[X]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = zext i16 [[Y]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i16 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label 
[[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ 1, 
[[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %conv = zext i16 %x to i32
+  %conv1 = zext i16 %y to i32
+  %cmp2 = icmp sgt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, 
%cond.true
+  %cond = phi i32 [ 0, %cond.true ], [ 1, %cond.false ]
+  ret i32 %cond
+}
+
+
+define signext i16 @zext_positive_and_sext(i32 noundef %n, ptr noundef %v) {
+; CHECK-LABEL: define signext i16 @zext_positive_and_sext(
+; CHECK-SAME: i32 noundef [[N:%.*]], ptr noundef [[V:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[P_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[CONV8:%.*]], 
[[COND_END:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], 
[[COND_END]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label 
[[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[P_0]] to i32
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I_0]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i16, ptr [[V]], i64 
[[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i16 [[P_0]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label 
[[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret i16 [[P_0]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[CONV1]], [[COND_TRUE]] ], [ 
[[CONV]], [[COND_FALSE]] ]
+; CHECK-NEXT:    [[CONV8]] = trunc i32 [[COND]] to i16
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %cond.end, %entry
+  %p.0 = phi i16 [ 0, %entry ], [ %conv8, %cond.end ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %for.cond
+  %conv = zext i16 %p.0 to i32                    ;; %p.0 is always positive 
here
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr i16, ptr %v, i64 %idxprom
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv1 = sext i16 %0 to i32
+  %cmp2 = icmp slt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  ret i16 %p.0
+
+cond.end:                                         ; preds = %cond.false, 
%cond.true
+  %cond = phi i32 [ %conv1, %cond.true ], [ %conv, %cond.false ]
+  %conv8 = trunc i32 %cond to i16
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+}
+
+
+
+define signext i16 @sext_and_zext_positive(i16 %x) {
+; CHECK-LABEL: define signext i16 @sext_and_zext_positive(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[V:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], 
[[COND_END:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[V]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[X]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i16 [[X]], [[V]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label 
[[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[A:%.*]] = phi i16 [ 10, [[COND_TRUE]] ], [ 20, 
[[COND_FALSE]] ]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[A]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %cond.end, %entry
+  %v = phi i16 [ 0, %entry ], [ %inc, %cond.end ] ;; always positive
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %conv = zext i16 %v to i32                    ;; %p.0 is always positive here
+  %conv1 = sext i16 %x to i32                    ;; %p.0 is always positive 
here
+  %cmp2 = icmp slt i32 %conv1, %conv ;; positive/positive
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, 
%cond.true
+  %a = phi i16 [ 10, %cond.true ],  [ 20, %cond.false ]
+  %inc = add i16 %a, 1
+  br label %for.cond
+}
diff --git a/llvm/test/Transforms/SCCP/widening.ll 
b/llvm/test/Transforms/SCCP/widening.ll
index f482ed3a4e7f655..84adf77c621e1ff 100644
--- a/llvm/test/Transforms/SCCP/widening.ll
+++ b/llvm/test/Transforms/SCCP/widening.ll
@@ -653,7 +653,7 @@ define ptr @wobble(ptr %arg, i32 %arg1) align 2 {
 ; SCCP-NEXT:    [[TMP52:%.*]] = call dereferenceable(1) ptr @spam(ptr [[ARG]], 
i32 [[TMP51]])
 ; SCCP-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
 ; SCCP-NEXT:    [[TMP54:%.*]] = zext i8 [[TMP53]] to i32
-; SCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i32 [[TMP48]], [[TMP54]]
+; SCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i8 [[TMP47]], [[TMP53]]
 ; SCCP-NEXT:    br i1 [[TMP55]], label [[BB56:%.*]], label [[BB60:%.*]]
 ; SCCP:       bb56:
 ; SCCP-NEXT:    [[TMP57:%.*]] = add nsw i32 [[TMP40]], -1
@@ -734,7 +734,7 @@ define ptr @wobble(ptr %arg, i32 %arg1) align 2 {
 ; IPSCCP-NEXT:    [[TMP52:%.*]] = call dereferenceable(1) ptr @spam(ptr 
[[ARG]], i32 [[TMP51]])
 ; IPSCCP-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
 ; IPSCCP-NEXT:    [[TMP54:%.*]] = zext i8 [[TMP53]] to i32
-; IPSCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i32 [[TMP48]], [[TMP54]]
+; IPSCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i8 [[TMP47]], [[TMP53]]
 ; IPSCCP-NEXT:    br i1 [[TMP55]], label [[BB56:%.*]], label [[BB60:%.*]]
 ; IPSCCP:       bb56:
 ; IPSCCP-NEXT:    br label [[BB60]]

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to