Author: post
Date: 2010-03-07 21:40:49 +0100 (Sun, 07 Mar 2010)
New Revision: 201

Modified:
   RawSpeed/RawImage.cpp
Log:
Enable SSE2 intrinsics for value scaling on 64 bit GCC.

Modified: RawSpeed/RawImage.cpp
===================================================================
--- RawSpeed/RawImage.cpp       2010-03-07 15:40:09 UTC (rev 200)
+++ RawSpeed/RawImage.cpp       2010-03-07 20:40:49 UTC (rev 201)
@@ -22,6 +22,10 @@
 
     http://www.klauspost.com
 */
+#if defined(__SSE2__)
+#include <emmintrin.h>
+#endif
+
 namespace RawSpeed {
 
 RawImageData::RawImageData(void):
@@ -131,14 +135,20 @@
   scaleValues(f);
 }
 
-#if _MSC_VER > 1399
+#if _MSC_VER > 1399 || defined(__SSE2__)
 
 void RawImageData::scaleValues(float f) {
+  gboolean use_sse2;
+#ifdef _MSC_VER 
   int info[4];
   __cpuid(info, 1);
+  use_sse2 = !!info[3]&(1 << 26)
+#else
+  use_sse2 = TRUE;
+#endif
 
   // Check SSE2
-  if (f >= 0.0f && info[3]&(1 << 26)) {
+  if (f >= 0.0f && use_sse2) {
 
     __m128i ssescale;
     __m128i ssesub;
@@ -202,84 +212,19 @@
 #else
 
 void RawImageData::scaleValues(float f) {
-#if 0
-  //TODO: Check for SSE2 on 32 bit systems and use it there
-  guint temp[20];
-
-  guint i = (int)(1024.0f * f);  // 10 bit fraction
-  i |= i << 16;
-  guint b = blackLevel | (blackLevel << 16);
-
-  for (int j = 0; j < 4; j++) {
-    temp[j] = b;
-    temp[j+4] = i;
-    temp[j+8] = 512;
-    temp[j+12] = 32768;
-    temp[j+16] = 0x80008000;
-  }
-
-  asm volatile
-  (
-    "movdqu 0(%0), %%xmm7\n"     // Subtraction
-    "movdqu 16(%0), %%xmm6\n"    // Multiplication factor
-    "movdqu 32(%0), %%xmm5\n"    // Fraction
-    "movdqu 48(%0), %%xmm4\n"    // Sub 32768
-    "movdqu 64(%0), %%xmm3\n"    // Sign shift
-  : // no output registers
-  : "r"(temp)
-        : //  %0
-      );
-
+  gint gw = dim.x * cpp;
+  int scale = (int)(16384.0f * f);  // 14 bit fraction
   for (int y = 0; y < dim.y; y++) {
-    guchar* pixel = (guchar*) & data[(mOffset.y+y)*pitch];
-    guint gw = pitch >> 4;
-    for (guint x  = 0; x < gw ; x++) {
-      asm volatile(
-        "next_pixel:\n"
-        "movaps 0(%0), %%xmm0\n"
-        "psubusw %%xmm7, %%xmm0\n"  // Subtract black
-        "movaps %%xmm0, %%xmm1\n"
-        "pmullw %%xmm6, %%xmm0\n"
-        "pmulhuw %%xmm6, %%xmm1\n"
-        "movaps %%xmm0, %%xmm2\n"
-        "punpcklwd %%xmm1, %%xmm0\n" // First 4 result
-        "punpckhwd %%xmm1, %%xmm2\n" // Last 4 result
-        "paddd %%xmm5, %%xmm0\n"      // Add fraction
-        "paddd %%xmm5, %%xmm2\n"
-        "psrad $10, %%xmm0\n"
-        "psrad $10, %%xmm2\n"
-        "psubd %%xmm4, %%xmm0\n"      // Avoid saturation
-        "psubd %%xmm4, %%xmm2\n"
-        "packssdw %%xmm2, %%xmm0\n"
-        "pxor %%xmm3, %%xmm0\n"       // Shift sign
-        "movaps %%xmm0, 0(%0)\n"
-
-        "add $16, %0\n"
-      : // no output registers
-      : "r"(pixel)
-            :  // %0
-          );
+    gushort *pixel = (gushort*)getData(0, y);
+    for (int x = 0 ; x < gw; x++) {
+      pixel[x] = clampbits(((pixel[x] - blackLevel) * scale + 8192) >> 14, 16);
     }
   }
-
-#else
-
-gint gw = dim.x * cpp;
-int scale = (int)(16384.0f * f);  // 14 bit fraction
-for (int y = 0; y < dim.y; y++) {
-  gushort *pixel = (gushort*)getData(0, y);
-  for (int x = 0 ; x < gw; x++) {
-    pixel[x] = clampbits(((pixel[x] - blackLevel) * scale + 8192) >> 14, 16);
-  }
 }
 
 #endif
 
-}
 
-#endif
-
-
 RawImage::RawImage(RawImageData* p) : p_(p) {
   pthread_mutex_lock(&p_->mymutex);
   ++p_->dataRefCount;


_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit

Reply via email to