Author: post
Date: 2010-06-25 17:16:14 +0200 (Fri, 25 Jun 2010)
New Revision: 3444
Modified:
trunk/plugins/dcp/dcp-sse2.c
trunk/plugins/dcp/dcp-sse4.c
Log:
Use faster squareroot function for contrast. Minor Speedup on core2, much
faster on Athlon and older Intels
Modified: trunk/plugins/dcp/dcp-sse2.c
===================================================================
--- trunk/plugins/dcp/dcp-sse2.c 2010-06-25 15:00:46 UTC (rev 3443)
+++ trunk/plugins/dcp/dcp-sse2.c 2010-06-25 15:16:14 UTC (rev 3444)
@@ -789,9 +789,9 @@
r = _mm_max_ps(r, min_val);
g = _mm_max_ps(g, min_val);
b = _mm_max_ps(b, min_val);
- r = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_sqrt_ps(r), contr_base)), contr_base);
- g = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_sqrt_ps(g), contr_base)), contr_base);
- b = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_sqrt_ps(b), contr_base)), contr_base);
+ r = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(r)), contr_base)), contr_base);
+ g = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(g)), contr_base)), contr_base);
+ b = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(b)), contr_base)), contr_base);
r = _mm_max_ps(r, min_val);
g = _mm_max_ps(g, min_val);
b = _mm_max_ps(b, min_val);
Modified: trunk/plugins/dcp/dcp-sse4.c
===================================================================
--- trunk/plugins/dcp/dcp-sse4.c 2010-06-25 15:00:46 UTC (rev 3443)
+++ trunk/plugins/dcp/dcp-sse4.c 2010-06-25 15:16:14 UTC (rev 3444)
@@ -668,9 +668,9 @@
r = _mm_max_ps(r, min_val);
g = _mm_max_ps(g, min_val);
b = _mm_max_ps(b, min_val);
- r = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_sqrt_ps(r), contr_base)), contr_base);
- g = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_sqrt_ps(g), contr_base)), contr_base);
- b = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_sqrt_ps(b), contr_base)), contr_base);
+ r = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(r)), contr_base)), contr_base);
+ g = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(g)), contr_base)), contr_base);
+ b = _mm_add_ps(_mm_mul_ps(contrast,
_mm_sub_ps(_mm_rcp_ps(_mm_rsqrt_ps(b)), contr_base)), contr_base);
r = _mm_max_ps(r, min_val);
g = _mm_max_ps(g, min_val);
b = _mm_max_ps(b, min_val);
_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit