Author: post
Date: 2009-09-09 23:49:08 +0200 (Wed, 09 Sep 2009)
New Revision: 148

Modified:
   RawSpeed/RawImage.cpp
Log:
Add GCC 64-bit SSE2 scaling for black/white.
Disabled Win32 assembler since it doesn't work properly.

Modified: RawSpeed/RawImage.cpp
===================================================================
--- RawSpeed/RawImage.cpp       2009-09-08 18:30:51 UTC (rev 147)
+++ RawSpeed/RawImage.cpp       2009-09-09 21:49:08 UTC (rev 148)
@@ -1,7 +1,7 @@
 #include "StdAfx.h"
 #include "RawImage.h"
 #include "RawDecoder.h"  // For exceptions
-/* 
+/*
     RawSpeed - RAW file decoder.
 
     Copyright (C) 2009 Klaus Post
@@ -23,18 +23,18 @@
     http://www.klauspost.com
 */
 
-RawImageData::RawImageData(void): 
-dim(0,0), bpp(0), isCFA(true),  
-blackLevel(-1), whitePoint(65536), 
+RawImageData::RawImageData(void):
+dim(0,0), bpp(0), isCFA(true),
+blackLevel(-1), whitePoint(65536),
 dataRefCount(0), data(0), cpp(1)
 {
   pthread_mutex_init(&mymutex, NULL);
   subsampling.x = subsampling.y = 1;
 }
 
-RawImageData::RawImageData(iPoint2D _dim, guint _bpc, guint _cpp) : 
-dim(_dim), bpp(_bpc), 
-blackLevel(-1), whitePoint(65536), 
+RawImageData::RawImageData(iPoint2D _dim, guint _bpc, guint _cpp) :
+dim(_dim), bpp(_bpc),
+blackLevel(-1), whitePoint(65536),
 dataRefCount(0),data(0), cpp(cpp)
 {
   subsampling.x = subsampling.y = 1;
@@ -139,25 +139,30 @@
   scaleValues(f);
 }
 
-#if _MSC_VER > 1399
+#if _MSC_VER > 13990
 
 void RawImageData::scaleValues(float f) {
   int info[4];
   __cpuid(info,1);
 
   // Check SSE2
-  if (f >= 0.0f && info[3]&(1<<26)) { 
-  
+  if (f >= 0.0f && info[3]&(1<<26)) {
+
     __m128i ssescale;
+    __m128i ssesub;
     guint gw = pitch / 16;
     guint i = (int)(65536.0f*f);  // 16 bit fraction
     i |= i<<16;
+    guint b = blackLevel | (blackLevel<<16);
+
     ssescale = _mm_set_epi32(i,i,i,i);
+    ssesub = _mm_set_epi32(b,b,b,b);
 
     for (int y = 0; y < dim.y; y++) {
       __m128i* pixel = (__m128i*)&data[(mOffset.y+y)*pitch];
       for (guint x = 0 ; x < gw; x++) {
         __m128i pix = _mm_load_si128(pixel);
+        pix = _mm_subs_epu16(pix, ssesub);
         pix = _mm_mulhi_epu16(pix, ssescale);
         _mm_store_si128(pixel, pix);
         pixel++;
@@ -179,14 +184,79 @@
 #else
 
 void RawImageData::scaleValues(float f) {
+#if defined (__x86_64__)
+  //TODO: Check for SSE2 on 32 bit systems and use it there
+  guint temp[20];
+
+  guint i = (int)(1024.0f*f);  // 10 bit fraction
+  i |= i<<16;
+  guint b = blackLevel | (blackLevel<<16);
+
+  for (int j = 0; j < 4; j++) {
+    temp[j] = b;
+    temp[j+4] = i;
+    temp[j+8] = 512;
+    temp[j+12] = 32768;
+    temp[j+16] = 0x80008000;
+  }
+
+  asm volatile
+      (
+       "movdqu 0(%0), %%xmm7\n"     // Subtraction
+       "movdqu 16(%0), %%xmm6\n"    // Multiplication factor
+       "movdqu 32(%0), %%xmm5\n"    // Fraction
+       "movdqu 48(%0), %%xmm4\n"    // Sub 32768
+       "movdqu 64(%0), %%xmm3\n"    // Sign shift
+    : // no output registers
+    : "r" (temp)
+    : //  %0
+      );
+
+  for (int y = 0; y < dim.y; y++) {
+    guchar* pixel = (guchar*)&data[(mOffset.y+y)*pitch];
+    guint gw = pitch >> 4;
+    for (guint x  = 0; x < gw ; x++) {
+      asm volatile (
+        "next_pixel:\n"
+        "movaps 0(%0), %%xmm0\n"
+        "psubusw %%xmm7, %%xmm0\n"  // Subtract black
+        "movaps %%xmm0, %%xmm1\n"
+        "pmullw %%xmm6, %%xmm0\n"
+        "pmulhuw %%xmm6, %%xmm1\n"
+        "movaps %%xmm0, %%xmm2\n"
+        "punpcklwd %%xmm1, %%xmm0\n" // First 4 result
+        "punpckhwd %%xmm1, %%xmm2\n" // Last 4 result
+        "paddd %%xmm5, %%xmm0\n"      // Add fraction
+        "paddd %%xmm5, %%xmm2\n"
+        "psrad $10, %%xmm0\n"
+        "psrad $10, %%xmm2\n"
+        "psubd %%xmm4, %%xmm0\n"      // Avoid saturation
+        "psubd %%xmm4, %%xmm2\n"
+        "packssdw %%xmm2, %%xmm0\n"
+        "pxor %%xmm3, %%xmm0\n"       // Shift sign
+        "movaps %%xmm0, 0(%0)\n"
+
+        "add $16, %0\n"
+      : // no output registers
+      : "r" (pixel)
+      :  // %0    
+      );
+    }
+  }
+
+#else
+
   gint gw = dim.x*cpp;
-  int scale = (int)(16384.0f*f);  // 14 bit fraction
+    int scale = (int)(16384.0f*f);  // 14 bit fraction
   for (int y = 0; y < dim.y; y++) {
     gushort *pixel = (gushort*)getData(0,y);
     for (int x = 0 ; x < gw; x++) {
-      pixel[x] = clampbits(((pixel[x]-blackLevel)*scale+8192)>>14,16);
+        pixel[x] = clampbits(((pixel[x]-blackLevel)*scale+8192)>>14,16);
     }
   }
+
+#endif
+
 }
 
 #endif


_______________________________________________
Rawstudio-commit mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-commit

Reply via email to