================
@@ -0,0 +1,356 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -O1 -o - | FileCheck %s
----------------
raoanag wrote:

Couple of observations here,

I tested out using select statement and it seems to be less optimal because it 
does both compute and returns the value, when we can return early with if 
statement

pasting example using select
`; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind 
willreturn memory(none)
define noundef nofpclass(nan inf) half @_Z17test_refract_halfDhDhDh(half 
noundef nofpclass(nan inf) %I, half noundef nofpclass(nan inf) %N, half noundef 
nofpclass(nan inf) %ETA) local_unnamed_addr #0 {
entry:
  %mul.i = fmul reassoc nnan ninf nsz arcp afn half %ETA, %ETA
  %0 = fmul reassoc nnan ninf nsz arcp afn half %N, %I
  %1 = fmul reassoc nnan ninf nsz arcp afn half %0, %0
  %sub.i = fsub reassoc nnan ninf nsz arcp afn half 0xH3C00, %1
  %mul4.i = fmul reassoc nnan ninf nsz arcp afn half %mul.i, %sub.i
  %sub5.i = fsub reassoc nnan ninf nsz arcp afn half 0xH3C00, %mul4.i
  %mul6.i = fmul reassoc nnan ninf nsz arcp afn half %ETA, %I
  %mul7.i = fmul reassoc nnan ninf nsz arcp afn half %N, %I
  %mul8.i = fmul reassoc nnan ninf nsz arcp afn half %mul7.i, %ETA
  %2 = tail call reassoc nnan ninf nsz arcp afn half @llvm.sqrt.f16(half 
%sub5.i)
  %add.i = fadd reassoc nnan ninf nsz arcp afn half %2, %mul8.i
  %mul9.i = fmul reassoc nnan ninf nsz arcp afn half %add.i, %N
  %sub10.i = fsub reassoc nnan ninf nsz arcp afn half %mul6.i, %mul9.i
  %cmp.i = fcmp reassoc nnan ninf nsz arcp afn olt half %sub5.i, 0xH0000
  %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %cmp.i, half 
0xH0000, half %sub10.i
  ret half %hlsl.select.i
}`

removing `O1` flag, I would like to get more context behind not using since it 
seems to bloat the test code,

`; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
define noundef nofpclass(nan inf) half @_Z17test_refract_halfDhDhDh(half 
noundef nofpclass(nan inf) %I, half noundef nofpclass(nan inf) %N, half noundef 
nofpclass(nan inf) %ETA) #0 {
entry:
  %I.addr.i1 = alloca half, align 2
  %N.addr.i2 = alloca half, align 2
  %Eta.addr.i = alloca half, align 2
  %K.i = alloca half, align 2
  %Result.i = alloca half, align 2
  %I.addr.i = alloca half, align 2
  %N.addr.i = alloca half, align 2
  %eta.addr.i = alloca half, align 2
  %I.addr = alloca half, align 2
  %N.addr = alloca half, align 2
  %ETA.addr = alloca half, align 2
  store half %I, ptr %I.addr, align 2
  store half %N, ptr %N.addr, align 2
  store half %ETA, ptr %ETA.addr, align 2
  %0 = load half, ptr %I.addr, align 2
  %1 = load half, ptr %N.addr, align 2
  %2 = load half, ptr %ETA.addr, align 2
  store half %0, ptr %I.addr.i, align 2
  store half %1, ptr %N.addr.i, align 2
  store half %2, ptr %eta.addr.i, align 2
  %3 = load half, ptr %I.addr.i, align 2
  %4 = load half, ptr %N.addr.i, align 2
  %5 = load half, ptr %eta.addr.i, align 2
  store half %3, ptr %I.addr.i1, align 2
  store half %4, ptr %N.addr.i2, align 2
  store half %5, ptr %Eta.addr.i, align 2
  %6 = load half, ptr %Eta.addr.i, align 2
  %7 = load half, ptr %Eta.addr.i, align 2
  %mul.i = fmul reassoc nnan ninf nsz arcp afn half %6, %7
  %8 = load half, ptr %N.addr.i2, align 2
  %9 = load half, ptr %I.addr.i1, align 2
  %mul1.i = fmul reassoc nnan ninf nsz arcp afn half %8, %9
  %10 = load half, ptr %N.addr.i2, align 2
  %mul2.i = fmul reassoc nnan ninf nsz arcp afn half %mul1.i, %10
  %11 = load half, ptr %I.addr.i1, align 2
  %mul3.i = fmul reassoc nnan ninf nsz arcp afn half %mul2.i, %11
  %sub.i = fsub reassoc nnan ninf nsz arcp afn half 0xH3C00, %mul3.i
  %mul4.i = fmul reassoc nnan ninf nsz arcp afn half %mul.i, %sub.i
  %sub5.i = fsub reassoc nnan ninf nsz arcp afn half 0xH3C00, %mul4.i
  store half %sub5.i, ptr %K.i, align 2
  %12 = load half, ptr %Eta.addr.i, align 2
  %13 = load half, ptr %I.addr.i1, align 2
  %mul6.i = fmul reassoc nnan ninf nsz arcp afn half %12, %13
  %14 = load half, ptr %Eta.addr.i, align 2
  %15 = load half, ptr %N.addr.i2, align 2
  %mul7.i = fmul reassoc nnan ninf nsz arcp afn half %14, %15
  %16 = load half, ptr %I.addr.i1, align 2
  %mul8.i = fmul reassoc nnan ninf nsz arcp afn half %mul7.i, %16
  %17 = load half, ptr %K.i, align 2
  %18 = call reassoc nnan ninf nsz arcp afn half @llvm.sqrt.f16(half %17)
  %add.i = fadd reassoc nnan ninf nsz arcp afn half %mul8.i, %18
  %19 = load half, ptr %N.addr.i2, align 2
  %mul9.i = fmul reassoc nnan ninf nsz arcp afn half %add.i, %19
  %sub10.i = fsub reassoc nnan ninf nsz arcp afn half %mul6.i, %mul9.i
  store half %sub10.i, ptr %Result.i, align 2
  %20 = load half, ptr %K.i, align 2
  %cmp.i = fcmp reassoc nnan ninf nsz arcp afn olt half %20, 0xH0000
  %21 = load half, ptr %Result.i, align 2
  %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %cmp.i, half 
0xH0000, half %21
  ret half %hlsl.select.i
}`

https://github.com/llvm/llvm-project/pull/136026
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to