We were talking a bit on IRC that the GLSL compiler implements the sqrt
function somewhat inefficiently. Instead of rsq+rcp+cmp instructions as is
in the original code, the proposed patch uses just rsq+mul. Please see the
patch log for further explanation, and please review.
-Marek
From 9b834a79a1819f3b4b9868be3e2696667791c83e Mon Sep 17 00:00:00 2001
From: =?utf-8?q?Marek=20Ol=C5=A1=C3=A1k?= <mar...@gmail.com>
Date: Sat, 27 Mar 2010 13:49:09 +0100
Subject: [PATCH] glsl: optimize sqrt
The new version can be derived from sqrt as follows:
sqrt(x) =
sqrt(x)^2 / sqrt(x) =
x / sqrt(x) =
x * rsqrt(x)
Also the need for the CMP instruction is gone because there is no division
by zero.
---
.../shader/slang/library/slang_common_builtin.gc | 22 +++----------------
1 files changed, 4 insertions(+), 18 deletions(-)
diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc
index a25ca55..3f6596c 100644
--- a/src/mesa/shader/slang/library/slang_common_builtin.gc
+++ b/src/mesa/shader/slang/library/slang_common_builtin.gc
@@ -602,50 +602,36 @@ vec4 exp2(const vec4 a)
float sqrt(const float x)
{
- const float nx = -x;
float r;
__asm float_rsq r, x;
- __asm float_rcp r, r;
- __asm vec4_cmp __retVal, nx, r, 0.0;
+ __retVal = r * x;
}
vec2 sqrt(const vec2 x)
{
- const vec2 nx = -x, zero = vec2(0.0);
vec2 r;
__asm float_rsq r.x, x.x;
__asm float_rsq r.y, x.y;
- __asm float_rcp r.x, r.x;
- __asm float_rcp r.y, r.y;
- __asm vec4_cmp __retVal, nx, r, zero;
+ __retVal = r * x;
}
vec3 sqrt(const vec3 x)
{
- const vec3 nx = -x, zero = vec3(0.0);
vec3 r;
__asm float_rsq r.x, x.x;
__asm float_rsq r.y, x.y;
__asm float_rsq r.z, x.z;
- __asm float_rcp r.x, r.x;
- __asm float_rcp r.y, r.y;
- __asm float_rcp r.z, r.z;
- __asm vec4_cmp __retVal, nx, r, zero;
+ __retVal = r * x;
}
vec4 sqrt(const vec4 x)
{
- const vec4 nx = -x, zero = vec4(0.0);
vec4 r;
__asm float_rsq r.x, x.x;
__asm float_rsq r.y, x.y;
__asm float_rsq r.z, x.z;
__asm float_rsq r.w, x.w;
- __asm float_rcp r.x, r.x;
- __asm float_rcp r.y, r.y;
- __asm float_rcp r.z, r.z;
- __asm float_rcp r.w, r.w;
- __asm vec4_cmp __retVal, nx, r, zero;
+ __retVal = r * x;
}
--
1.6.3.3
------------------------------------------------------------------------------
Download Intel® Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
Mesa3d-dev mailing list
Mesa3d-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev