We were talking a bit on IRC that the GLSL compiler implements the sqrt
function somewhat inefficiently. Instead of rsq+rcp+cmp instructions as is
in the original code, the proposed patch uses just rsq+mul. Please see the
patch log for further explanation, and please review.

-Marek
From 9b834a79a1819f3b4b9868be3e2696667791c83e Mon Sep 17 00:00:00 2001
From: =?utf-8?q?Marek=20Ol=C5=A1=C3=A1k?= <mar...@gmail.com>
Date: Sat, 27 Mar 2010 13:49:09 +0100
Subject: [PATCH] glsl: optimize sqrt

The new version can be derived from sqrt as follows:

sqrt(x) =
sqrt(x)^2 / sqrt(x) =
x / sqrt(x) =
x * rsqrt(x)

Also the need for the CMP instruction is gone because there is no division
by zero.
---
 .../shader/slang/library/slang_common_builtin.gc   |   22 +++----------------
 1 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc
index a25ca55..3f6596c 100644
--- a/src/mesa/shader/slang/library/slang_common_builtin.gc
+++ b/src/mesa/shader/slang/library/slang_common_builtin.gc
@@ -602,50 +602,36 @@ vec4 exp2(const vec4 a)
 
 float sqrt(const float x)
 {
-   const float nx = -x;
    float r;
    __asm float_rsq r, x;
-   __asm float_rcp r, r;
-   __asm vec4_cmp __retVal, nx, r, 0.0;
+   __retVal = r * x;
 }
 
 vec2 sqrt(const vec2 x)
 {
-   const vec2 nx = -x, zero = vec2(0.0);
    vec2 r;
    __asm float_rsq r.x, x.x;
    __asm float_rsq r.y, x.y;
-   __asm float_rcp r.x, r.x;
-   __asm float_rcp r.y, r.y;
-   __asm vec4_cmp __retVal, nx, r, zero;
+   __retVal = r * x;
 }
 
 vec3 sqrt(const vec3 x)
 {
-   const vec3 nx = -x, zero = vec3(0.0);
    vec3 r;
    __asm float_rsq r.x, x.x;
    __asm float_rsq r.y, x.y;
    __asm float_rsq r.z, x.z;
-   __asm float_rcp r.x, r.x;
-   __asm float_rcp r.y, r.y;
-   __asm float_rcp r.z, r.z;
-   __asm vec4_cmp __retVal, nx, r, zero;
+   __retVal = r * x;
 }
 
 vec4 sqrt(const vec4 x)
 {
-   const vec4 nx = -x, zero = vec4(0.0);
    vec4 r;
    __asm float_rsq r.x, x.x;
    __asm float_rsq r.y, x.y;
    __asm float_rsq r.z, x.z;
    __asm float_rsq r.w, x.w;
-   __asm float_rcp r.x, r.x;
-   __asm float_rcp r.y, r.y;
-   __asm float_rcp r.z, r.z;
-   __asm float_rcp r.w, r.w;
-   __asm vec4_cmp __retVal, nx, r, zero;
+   __retVal = r * x;
 }
 
 
-- 
1.6.3.3

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
Mesa3d-dev mailing list
Mesa3d-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev

Reply via email to