On 03/04/16 16:20, Roland Scheidegger wrote:
Am 03.04.2016 um 12:24 schrieb Jose Fonseca:
We could unconditionally use these instrinsics, but performance with SSE2
would suck, as LLVM falls back to calling libm.
Would that really work now? Last time I checked, calls out to libm from
the jitted code weren't just slow, they simply crashed.

It worked on the specific LLVM version I tried and Linux. I didn't test exhaustively on all LLVM versions nor Windows.

Keeping fallback solutions for non-SSE4.1 cases is not onerous. And I think we should do it for as long as LLVM doesn't emit better code for these out of the box. Not so much for SSE2 sake (as one day those systems will go out of use), but because if someday somebody tries to bring up llvmpipe on a different ISA, falling back to scalar C runtime calls for these would kill performance big time, as they are needed for texture sampling.

Jose


lp_test_arit.
---
  src/gallium/auxiliary/gallivm/lp_bld_arit.c | 46 ++++++++++++++++++++++++-----
  1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c 
b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 01c6ba9..1277743 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1675,13 +1675,13 @@ enum lp_build_round_mode
   * result is the even value.  That is, rounding 2.5 will be 2.0, and not 3.0.
   */
  static inline LLVMValueRef
-lp_build_round_sse41(struct lp_build_context *bld,
-                     LLVMValueRef a,
-                     enum lp_build_round_mode mode)
+lp_build_nearest_sse41(struct lp_build_context *bld,
+                       LLVMValueRef a)
  {
     LLVMBuilderRef builder = bld->gallivm->builder;
     const struct lp_type type = bld->type;
     LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
+   LLVMValueRef mode = LLVMConstNull(i32t);
     const char *intrinsic;
     LLVMValueRef res;

@@ -1714,7 +1714,7 @@ lp_build_round_sse41(struct lp_build_context *bld,

        args[0] = undef;
        args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
-      args[2] = LLVMConstInt(i32t, mode, 0);
+      args[2] = mode;

        res = lp_build_intrinsic(builder, intrinsic,
                                 vec_type, args, Elements(args), 0);
@@ -1754,7 +1754,7 @@ lp_build_round_sse41(struct lp_build_context *bld,

        res = lp_build_intrinsic_binary(builder, intrinsic,
                                        bld->vec_type, a,
-                                      LLVMConstInt(i32t, mode, 0));
+                                      mode);
     }

     return res;
@@ -1856,8 +1856,40 @@ lp_build_round_arch(struct lp_build_context *bld,
                      LLVMValueRef a,
                      enum lp_build_round_mode mode)
  {
-   if (util_cpu_caps.has_sse4_1)
-     return lp_build_round_sse41(bld, a, mode);
+   if (util_cpu_caps.has_sse4_1) {
+      LLVMBuilderRef builder = bld->gallivm->builder;
+      const struct lp_type type = bld->type;
+      const char *intrinsic_root;
+      char intrinsic[32];
+
+      assert(type.floating);
+      assert(lp_check_value(type, a));
+      (void)type;
+
+      switch (mode) {
+      case LP_BUILD_ROUND_NEAREST:
+         if (HAVE_LLVM >= 0x0304) {
+            intrinsic_root = "llvm.round";
+         } else {
+            return lp_build_nearest_sse41(bld, a);
+         }
+         break;
+      case LP_BUILD_ROUND_FLOOR:
+         intrinsic_root = "llvm.floor";
+         break;
+      case LP_BUILD_ROUND_CEIL:
+         intrinsic_root = "llvm.ceil";
+         break;
+      case LP_BUILD_ROUND_TRUNCATE:
+         intrinsic_root = "llvm.trunc";
+         break;
+      }
+
+      util_snprintf(intrinsic, sizeof intrinsic, "%s.v%uf%u",
+                    intrinsic_root, type.length, type.width);
+
+      return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
+   }
     else /* (util_cpu_caps.has_altivec) */
       return lp_build_round_altivec(bld, a, mode);
  }


Looks good to me. I'd guess it would work for altivec too, though it's
better to let someone handle that who can test it :-).
Those were all new in llvm 3.3, too bad they forgot the round...


Reviewed-by: Roland Scheidegger <[email protected]>


_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to