================ @@ -0,0 +1,356 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -finclude-default-header -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -O1 -o - | FileCheck %s ---------------- raoanag wrote:
Couple of observations here, I tested out using select statement and it seems to be less optimal because it does both compute and returns the value, when we can return early with if statement pasting example using select `; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind willreturn memory(none) define noundef nofpclass(nan inf) half @_Z17test_refract_halfDhDhDh(half noundef nofpclass(nan inf) %I, half noundef nofpclass(nan inf) %N, half noundef nofpclass(nan inf) %ETA) local_unnamed_addr #0 { entry: %mul.i = fmul reassoc nnan ninf nsz arcp afn half %ETA, %ETA %0 = fmul reassoc nnan ninf nsz arcp afn half %N, %I %1 = fmul reassoc nnan ninf nsz arcp afn half %0, %0 %sub.i = fsub reassoc nnan ninf nsz arcp afn half 0xH3C00, %1 %mul4.i = fmul reassoc nnan ninf nsz arcp afn half %mul.i, %sub.i %sub5.i = fsub reassoc nnan ninf nsz arcp afn half 0xH3C00, %mul4.i %mul6.i = fmul reassoc nnan ninf nsz arcp afn half %ETA, %I %mul7.i = fmul reassoc nnan ninf nsz arcp afn half %N, %I %mul8.i = fmul reassoc nnan ninf nsz arcp afn half %mul7.i, %ETA %2 = tail call reassoc nnan ninf nsz arcp afn half @llvm.sqrt.f16(half %sub5.i) %add.i = fadd reassoc nnan ninf nsz arcp afn half %2, %mul8.i %mul9.i = fmul reassoc nnan ninf nsz arcp afn half %add.i, %N %sub10.i = fsub reassoc nnan ninf nsz arcp afn half %mul6.i, %mul9.i %cmp.i = fcmp reassoc nnan ninf nsz arcp afn olt half %sub5.i, 0xH0000 %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %cmp.i, half 0xH0000, half %sub10.i ret half %hlsl.select.i }` removing `O1` flag, I would like to get more context behind not using since it seems to bloat the test code, `; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind define noundef nofpclass(nan inf) half @_Z17test_refract_halfDhDhDh(half noundef nofpclass(nan inf) %I, half noundef nofpclass(nan inf) %N, half noundef nofpclass(nan inf) %ETA) #0 { entry: %I.addr.i1 = alloca half, align 2 %N.addr.i2 = alloca half, align 2 %Eta.addr.i = alloca half, align 2 %K.i = alloca half, align 2 %Result.i = alloca half, align 2 %I.addr.i = alloca half, align 2 %N.addr.i = alloca half, align 2 %eta.addr.i = alloca half, align 2 %I.addr = alloca half, align 2 %N.addr = alloca half, align 2 %ETA.addr = alloca half, align 2 store half %I, ptr %I.addr, align 2 store half %N, ptr %N.addr, align 2 store half %ETA, ptr %ETA.addr, align 2 %0 = load half, ptr %I.addr, align 2 %1 = load half, ptr %N.addr, align 2 %2 = load half, ptr %ETA.addr, align 2 store half %0, ptr %I.addr.i, align 2 store half %1, ptr %N.addr.i, align 2 store half %2, ptr %eta.addr.i, align 2 %3 = load half, ptr %I.addr.i, align 2 %4 = load half, ptr %N.addr.i, align 2 %5 = load half, ptr %eta.addr.i, align 2 store half %3, ptr %I.addr.i1, align 2 store half %4, ptr %N.addr.i2, align 2 store half %5, ptr %Eta.addr.i, align 2 %6 = load half, ptr %Eta.addr.i, align 2 %7 = load half, ptr %Eta.addr.i, align 2 %mul.i = fmul reassoc nnan ninf nsz arcp afn half %6, %7 %8 = load half, ptr %N.addr.i2, align 2 %9 = load half, ptr %I.addr.i1, align 2 %mul1.i = fmul reassoc nnan ninf nsz arcp afn half %8, %9 %10 = load half, ptr %N.addr.i2, align 2 %mul2.i = fmul reassoc nnan ninf nsz arcp afn half %mul1.i, %10 %11 = load half, ptr %I.addr.i1, align 2 %mul3.i = fmul reassoc nnan ninf nsz arcp afn half %mul2.i, %11 %sub.i = fsub reassoc nnan ninf nsz arcp afn half 0xH3C00, %mul3.i %mul4.i = fmul reassoc nnan ninf nsz arcp afn half %mul.i, %sub.i %sub5.i = fsub reassoc nnan ninf nsz arcp afn half 0xH3C00, %mul4.i store half %sub5.i, ptr %K.i, align 2 %12 = load half, ptr %Eta.addr.i, align 2 %13 = load half, ptr %I.addr.i1, align 2 %mul6.i = fmul reassoc nnan ninf nsz arcp afn half %12, %13 %14 = load half, ptr %Eta.addr.i, align 2 %15 = load half, ptr %N.addr.i2, align 2 %mul7.i = fmul reassoc nnan ninf nsz arcp afn half %14, %15 %16 = load half, ptr %I.addr.i1, align 2 %mul8.i = fmul reassoc nnan ninf nsz arcp afn half %mul7.i, %16 %17 = load half, ptr %K.i, align 2 %18 = call reassoc nnan ninf nsz arcp afn half @llvm.sqrt.f16(half %17) %add.i = fadd reassoc nnan ninf nsz arcp afn half %mul8.i, %18 %19 = load half, ptr %N.addr.i2, align 2 %mul9.i = fmul reassoc nnan ninf nsz arcp afn half %add.i, %19 %sub10.i = fsub reassoc nnan ninf nsz arcp afn half %mul6.i, %mul9.i store half %sub10.i, ptr %Result.i, align 2 %20 = load half, ptr %K.i, align 2 %cmp.i = fcmp reassoc nnan ninf nsz arcp afn olt half %20, 0xH0000 %21 = load half, ptr %Result.i, align 2 %hlsl.select.i = select reassoc nnan ninf nsz arcp afn i1 %cmp.i, half 0xH0000, half %21 ret half %hlsl.select.i }` https://github.com/llvm/llvm-project/pull/136026 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits