Issue 60603
Summary [RISCV] Correctness issue with +zve32x and float vector instructions
Labels bug, backend:RISC-V
Assignees
Reporter dcaballe
    The following function produces the wrong output when compiled with `+zve32x`. It works with `+v` or `+zve32f`.

```
target datalayout = "e-m:e-p:32:32-i64:64-n32-S128"
target triple = "riscv32-unknown-unknown-eabi-elf"

%iree_hal_executable_dispatch_state_v0_t = type { i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr }
%iree_hal_executable_workgroup_state_v0_t = type { i32, i32, i16, i16, i32, ptr, i32 }

define i32 @_iota_dim0_dispatch_0_generic_2x3(ptr noalias nocapture nonnull readnone align 16 %0, ptr noalias nocapture nonnull readonly align 16 %1, ptr noalias nocapture nonnull readonly align 16 %2) local_unnamed_addr #0 {
  %.elt19 = getelementptr inbounds %iree_hal_executable_dispatch_state_v0_t, ptr %1, i32 0, i32 10
 %.unpack20 = load ptr, ptr %.elt19, align 4
  %4 = load ptr, ptr %.unpack20, align 8
  %splitgep = getelementptr i8, ptr %4, i32 64
 %.elt23 = getelementptr inbounds %iree_hal_executable_workgroup_state_v0_t, ptr %2, i32 0, i32 1
  %.unpack24 = load i32, ptr %.elt23, align 4
  %5 = shl i32 %.unpack24, 1
  %6 = insertelement <3 x i32> undef, i32 %5, i64 0
  %7 = sitofp <3 x i32> %6 to <3 x float>
  %8 = shufflevector <3 x float> %7, <3 x float> poison, <3 x i32> zeroinitializer
  store <3 x float> %8, ptr %splitgep, align 64
  %9 = add <3 x i32> %6, <i32 1, i32 undef, i32 undef>
  %10 = sitofp <3 x i32> %9 to <3 x float>
  %11 = shufflevector <3 x float> %10, <3 x float> poison, <3 x i32> zeroinitializer
  %12 = getelementptr i8, ptr %4, i32 76
  store <3 x float> %11, ptr %12, align 4
  ret i32 0
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) "frame-pointer"="all" "hot" "no-builtins" "nonlazybind" }
```

llc repro.ll -O3 -mattr=+m,+a,+f,+zvl512b,+zve32x -mtriple=riscv32 -target-abi=ilp32:
```
_iota_dim0_dispatch_0_generic_2x3:      # @_iota_dim0_dispatch_0_generic_2x3
# %bb.0:
	addi	sp, sp, -16
	sw	ra, 12(sp)                      # 4-byte Folded Spill
	sw	s0, 8(sp)                       # 4-byte Folded Spill
	addi	s0, sp, 16
	lw	a0, 28(a1)
	lw	a1, 4(a2)
	lw	a0, 0(a0)
	slli	a1, a1, 1
	fcvt.s.w	ft0, a1
	fsw	ft0, 72(a0)
	fsw	ft0, 68(a0)
	fsw	ft0, 64(a0)
	addi	a1, a1, 1
	fcvt.s.w	ft0, a1
	fsw	ft0, 84(a0)
	fsw	ft0, 80(a0)
	fsw	ft0, 76(a0)
	li	a0, 0
	addi	sp, s0, -16
	lw	ra, 12(sp)                      # 4-byte Folded Reload
	lw	s0, 8(sp)                       # 4-byte Folded Reload
	addi	sp, sp, 16
	ret
```

llc repro.ll -O3 -mattr=+m,+a,+f,+zvl512b,+v -mtriple=riscv64 -target-abi=lp64d:
```
_iota_dim0_dispatch_0_generic_2x3:      # @_iota_dim0_dispatch_0_generic_2x3
# %bb.0:
	addi	sp, sp, -16
	sd	ra, 8(sp)                       # 8-byte Folded Spill
	sd	s0, 0(sp)                       # 8-byte Folded Spill
	addi	s0, sp, 16
	lwu	a0, 36(a1)
	lwu	a1, 32(a1)
	slli	a0, a0, 32
	or	a0, a0, a1
	ld	a0, 0(a0)
	lw	a1, 4(a2)
	addi	a2, a0, 64
	slli	a1, a1, 1
	vsetivli	zero, 4, e32, mf2, ta, ma
	vmv.v.x	v8, a1
	vfcvt.f.x.v	v9, v8
	vrgather.vi	v10, v9, 0
	addi	a1, a0, 72
	vsetivli	zero, 1, e32, mf2, ta, ma
	vse32.v	v9, (a1)
	vse64.v	v10, (a2)
	vsetivli	zero, 4, e32, mf2, ta, ma
	vadd.vi	v8, v8, 1
	vfcvt.f.x.v	v8, v8
	vrgather.vi	v9, v8, 0
	addi	a1, a0, 84
	vsetivli	zero, 1, e64, m1, ta, ma
	vse32.v	v8, (a1)
	vmv.x.s	a1, v9
	sw	a1, 76(a0)
	srli	a1, a1, 32
	sw	a1, 80(a0)
	li	a0, 0
	addi	sp, s0, -16
	ld	ra, 8(sp)                       # 8-byte Folded Reload
	ld	s0, 0(sp)                       # 8-byte Folded Reload
	addi	sp, sp, 16
	ret
```

The `+zve32x` version produces the output `[0, 0, 1]`, which is not correct, and the `+v` (or +zve32f`) produces `[0, 1, 2]`, which is correct. I think this is another problem related to dealing with float vectors in zve32x mode.

_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to