Title: [121900] trunk/Source/WebCore
Revision
121900
Author
rga...@webkit.org
Date
2012-07-05 05:31:56 -0700 (Thu, 05 Jul 2012)

Log Message

NEON intrinsics should be used with gaussian blur filter
https://bugs.webkit.org/show_bug.cgi?id=90166

Reviewed by Zoltan Herczeg.

Rewrite inline assembly to NEON intrinsics for better portabilty 
and readibility. Remove unnecessary FEGaussianBlurNEON.cpp and add
NEONHelpers.h to the project which will contains the shared
NEON code of the filters.

Existing tests cover this issue.

* CMakeLists.txt:
* GNUmakefile.list.am:
* Target.pri:
* WebCore.gypi:
* WebCore.vcproj/WebCore.vcproj:
* WebCore.xcodeproj/project.pbxproj:
* platform/graphics/filters/FEGaussianBlur.cpp:
(WebCore::FEGaussianBlur::platformApplyGeneric):
(WebCore::FEGaussianBlur::platformApplyWorker):
* platform/graphics/filters/FEGaussianBlur.h:
(FEGaussianBlur):
* platform/graphics/filters/arm/FEGaussianBlurNEON.cpp: Removed.
* platform/graphics/filters/arm/FEGaussianBlurNEON.h:
(WebCore::boxBlurNEON):
* platform/graphics/filters/arm/NEONHelpers.h: Added.
(WebCore):
(WebCore::loadRGBA8AsFloat):
(WebCore::storeFloatAsRGBA8):

Modified Paths

Added Paths

Removed Paths

Diff

Modified: trunk/Source/WebCore/CMakeLists.txt (121899 => 121900)


--- trunk/Source/WebCore/CMakeLists.txt	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/CMakeLists.txt	2012-07-05 12:31:56 UTC (rev 121900)
@@ -1239,8 +1239,8 @@
     platform/graphics/filters/SourceAlpha.cpp
     platform/graphics/filters/SourceGraphic.cpp
 
+    platform/graphics/filters/arm/NEONHelpers.h
     platform/graphics/filters/arm/FECompositeArithmeticNEON.h
-    platform/graphics/filters/arm/FEGaussianBlurNEON.cpp
     platform/graphics/filters/arm/FEGaussianBlurNEON.h
     platform/graphics/filters/arm/FELightingNEON.cpp
     platform/graphics/filters/arm/FELightingNEON.h

Modified: trunk/Source/WebCore/ChangeLog (121899 => 121900)


--- trunk/Source/WebCore/ChangeLog	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/ChangeLog	2012-07-05 12:31:56 UTC (rev 121900)
@@ -1,3 +1,36 @@
+2012-07-05  Gabor Rapcsanyi  <rga...@webkit.org>
+
+        NEON intrinsics should be used with gaussian blur filter
+        https://bugs.webkit.org/show_bug.cgi?id=90166
+
+        Reviewed by Zoltan Herczeg.
+
+        Rewrite inline assembly to NEON intrinsics for better portabilty 
+        and readibility. Remove unnecessary FEGaussianBlurNEON.cpp and add
+        NEONHelpers.h to the project which will contains the shared
+        NEON code of the filters.
+
+        Existing tests cover this issue.
+
+        * CMakeLists.txt:
+        * GNUmakefile.list.am:
+        * Target.pri:
+        * WebCore.gypi:
+        * WebCore.vcproj/WebCore.vcproj:
+        * WebCore.xcodeproj/project.pbxproj:
+        * platform/graphics/filters/FEGaussianBlur.cpp:
+        (WebCore::FEGaussianBlur::platformApplyGeneric):
+        (WebCore::FEGaussianBlur::platformApplyWorker):
+        * platform/graphics/filters/FEGaussianBlur.h:
+        (FEGaussianBlur):
+        * platform/graphics/filters/arm/FEGaussianBlurNEON.cpp: Removed.
+        * platform/graphics/filters/arm/FEGaussianBlurNEON.h:
+        (WebCore::boxBlurNEON):
+        * platform/graphics/filters/arm/NEONHelpers.h: Added.
+        (WebCore):
+        (WebCore::loadRGBA8AsFloat):
+        (WebCore::storeFloatAsRGBA8):
+
 2012-07-05  Sam D  <dsam2...@gmail.com>
 
         Web Inspector: Having a "Scroll into view" for nodes through web inspector.

Modified: trunk/Source/WebCore/GNUmakefile.list.am (121899 => 121900)


--- trunk/Source/WebCore/GNUmakefile.list.am	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/GNUmakefile.list.am	2012-07-05 12:31:56 UTC (rev 121900)
@@ -3284,8 +3284,8 @@
 	Source/WebCore/platform/graphics/filters/SourceGraphic.h \
 	Source/WebCore/platform/graphics/filters/SpotLightSource.cpp \
 	Source/WebCore/platform/graphics/filters/SpotLightSource.h \
+	Source/WebCore/platform/graphics/filters/arm/NEONHelpers.h \
 	Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.h \
-	Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.cpp \
 	Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.h \
 	Source/WebCore/platform/graphics/filters/arm/FELightingNEON.cpp \
 	Source/WebCore/platform/graphics/filters/arm/FELightingNEON.h \

Modified: trunk/Source/WebCore/Target.pri (121899 => 121900)


--- trunk/Source/WebCore/Target.pri	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/Target.pri	2012-07-05 12:31:56 UTC (rev 121900)
@@ -2271,6 +2271,7 @@
     platform/graphics/filters/LightSource.h \
     platform/graphics/filters/SourceAlpha.h \
     platform/graphics/filters/SourceGraphic.h \
+    platform/graphics/filters/arm/NEONHelpers.h \
     platform/graphics/filters/arm/FECompositeArithmeticNEON.h \
     platform/graphics/filters/arm/FEGaussianBlurNEON.h \
     platform/graphics/filters/arm/FELightingNEON.h \
@@ -3473,7 +3474,6 @@
         platform/graphics/filters/SourceAlpha.cpp \
         platform/graphics/filters/SourceGraphic.cpp \
         platform/graphics/filters/arm/FELightingNEON.cpp \
-        platform/graphics/filters/arm/FEGaussianBlurNEON.cpp \
 }
 
 contains(DEFINES, ENABLE_MATHML=1) {

Modified: trunk/Source/WebCore/WebCore.gypi (121899 => 121900)


--- trunk/Source/WebCore/WebCore.gypi	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/WebCore.gypi	2012-07-05 12:31:56 UTC (rev 121900)
@@ -3707,8 +3707,8 @@
             'platform/graphics/filters/SourceGraphic.h',
             'platform/graphics/filters/SpotLightSource.cpp',
             'platform/graphics/filters/SpotLightSource.h',
+            'platform/graphics/filters/arm/NEONHelpers.h',
             'platform/graphics/filters/arm/FECompositeArithmeticNEON.h',
-            'platform/graphics/filters/arm/FEGaussianBlurNEON.cpp',
             'platform/graphics/filters/arm/FEGaussianBlurNEON.h',
             'platform/graphics/filters/arm/FELightingNEON.cpp',
             'platform/graphics/filters/arm/FELightingNEON.h',

Modified: trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj (121899 => 121900)


--- trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj	2012-07-05 12:31:56 UTC (rev 121900)
@@ -31629,10 +31629,6 @@
 						>
 					</File>
 					<File
-						RelativePath="..\platform\graphics\filters\arm\FEGaussianBlurNEON.cpp"
-						>
-					</File>
-					<File
 						RelativePath="..\platform\graphics\filters\arm\FEGaussianBlurNEON.h"
 						>
 					</File>
@@ -31737,6 +31733,10 @@
 						>
 					</File>
 					<File
+						RelativePath="..\platform\graphics\filters\arm\NEONHelpers.h"
+						>
+					</File>
+					<File
 						RelativePath="..\platform\graphics\filters\PointLightSource.cpp"
 						>
 					</File>

Modified: trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj (121899 => 121900)


--- trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj	2012-07-05 12:31:56 UTC (rev 121900)
@@ -1331,7 +1331,6 @@
 		49E912AD0EFAC906009D0CAF /* AnimationList.h in Headers */ = {isa = PBXBuildFile; fileRef = 49E912A80EFAC906009D0CAF /* AnimationList.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		49E912AE0EFAC906009D0CAF /* TimingFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 49E912A90EFAC906009D0CAF /* TimingFunction.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		49ECEB681499790D00CDD3A4 /* FECompositeArithmeticNEON.h in Headers */ = {isa = PBXBuildFile; fileRef = 49ECEB5E1499790D00CDD3A4 /* FECompositeArithmeticNEON.h */; };
-		49ECEB691499790D00CDD3A4 /* FEGaussianBlurNEON.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49ECEB5F1499790D00CDD3A4 /* FEGaussianBlurNEON.cpp */; };
 		49ECEB6A1499790D00CDD3A4 /* FEGaussianBlurNEON.h in Headers */ = {isa = PBXBuildFile; fileRef = 49ECEB601499790D00CDD3A4 /* FEGaussianBlurNEON.h */; };
 		49ECEB6B1499790D00CDD3A4 /* FELightingNEON.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49ECEB611499790D00CDD3A4 /* FELightingNEON.cpp */; };
 		49ECEB6C1499790D00CDD3A4 /* FELightingNEON.h in Headers */ = {isa = PBXBuildFile; fileRef = 49ECEB621499790D00CDD3A4 /* FELightingNEON.h */; };
@@ -8401,7 +8400,6 @@
 		49E912A80EFAC906009D0CAF /* AnimationList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AnimationList.h; path = animation/AnimationList.h; sourceTree = "<group>"; };
 		49E912A90EFAC906009D0CAF /* TimingFunction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TimingFunction.h; path = animation/TimingFunction.h; sourceTree = "<group>"; };
 		49ECEB5E1499790D00CDD3A4 /* FECompositeArithmeticNEON.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FECompositeArithmeticNEON.h; sourceTree = "<group>"; };
-		49ECEB5F1499790D00CDD3A4 /* FEGaussianBlurNEON.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FEGaussianBlurNEON.cpp; sourceTree = "<group>"; };
 		49ECEB601499790D00CDD3A4 /* FEGaussianBlurNEON.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FEGaussianBlurNEON.h; sourceTree = "<group>"; };
 		49ECEB611499790D00CDD3A4 /* FELightingNEON.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FELightingNEON.cpp; sourceTree = "<group>"; };
 		49ECEB621499790D00CDD3A4 /* FELightingNEON.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FELightingNEON.h; sourceTree = "<group>"; };
@@ -10160,6 +10158,7 @@
 		91089D3014C335CD005AFC49 /* JSUint8ClampedArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSUint8ClampedArray.h; sourceTree = "<group>"; };
 		91A3905814C0F4B900F67901 /* Uint8ClampedArray.idl */ = {isa = PBXFileReference; lastKnownFileType = text; name = Uint8ClampedArray.idl; path = canvas/Uint8ClampedArray.idl; sourceTree = "<group>"; };
 		91A3905A14C0F61100F67901 /* JSUint8ClampedArrayCustom.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JSUint8ClampedArrayCustom.cpp; sourceTree = "<group>"; };
+		930062D7159B45B600ACD48A /* NEONHelpers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = NEONHelpers.h; sourceTree = "<group>"; };
 		9302B0BC0D79F82900C7EE83 /* PageGroup.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PageGroup.cpp; sourceTree = "<group>"; };
 		9302B0BE0D79F82C00C7EE83 /* PageGroup.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PageGroup.h; sourceTree = "<group>"; };
 		9305B24C098F1B6B00C28855 /* Timer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Timer.h; sourceTree = "<group>"; };
@@ -15101,8 +15100,8 @@
 			isa = PBXGroup;
 			children = (
 				49ECEB5E1499790D00CDD3A4 /* FECompositeArithmeticNEON.h */,
-				49ECEB5F1499790D00CDD3A4 /* FEGaussianBlurNEON.cpp */,
 				49ECEB601499790D00CDD3A4 /* FEGaussianBlurNEON.h */,
+				930062D7159B45B600ACD48A /* NEONHelpers.h */,
 				49ECEB611499790D00CDD3A4 /* FELightingNEON.cpp */,
 				49ECEB621499790D00CDD3A4 /* FELightingNEON.h */,
 			);
@@ -26229,7 +26228,6 @@
 				4358E8801360A31700E4748C /* FEDropShadow.cpp in Sources */,
 				84730D7E1248F0B300D3A9C9 /* FEFlood.cpp in Sources */,
 				84801954108BAFB300CB2B1F /* FEGaussianBlur.cpp in Sources */,
-				49ECEB691499790D00CDD3A4 /* FEGaussianBlurNEON.cpp in Sources */,
 				84730D801248F0B300D3A9C9 /* FELighting.cpp in Sources */,
 				49ECEB6B1499790D00CDD3A4 /* FELightingNEON.cpp in Sources */,
 				84730D821248F0B300D3A9C9 /* FEMerge.cpp in Sources */,

Modified: trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp (121899 => 121900)


--- trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp	2012-07-05 12:31:56 UTC (rev 121900)
@@ -120,13 +120,27 @@
     for (int i = 0; i < 3; ++i) {
         if (kernelSizeX) {
             kernelPosition(i, kernelSizeX, dxLeft, dxRight);
+#if CPU(ARM_NEON) && COMPILER(GCC)
+            if (!isAlphaImage())
+                boxBlurNEON(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height());
+            else
+                boxBlur(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), true);
+#else
             boxBlur(src, dst, kernelSizeX, dxLeft, dxRight, 4, stride, paintSize.width(), paintSize.height(), isAlphaImage());
+#endif
             swap(src, dst);
         }
 
         if (kernelSizeY) {
             kernelPosition(i, kernelSizeY, dyLeft, dyRight);
+#if CPU(ARM_NEON) && COMPILER(GCC)
+            if (!isAlphaImage())
+                boxBlurNEON(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width());
+            else
+                boxBlur(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), true);
+#else
             boxBlur(src, dst, kernelSizeY, dyLeft, dyRight, stride, 4, paintSize.height(), paintSize.width(), isAlphaImage());
+#endif
             swap(src, dst);
         }
     }
@@ -142,13 +156,8 @@
 void FEGaussianBlur::platformApplyWorker(PlatformApplyParameters* parameters)
 {
     IntSize paintSize(parameters->width, parameters->height);
-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC)
-    parameters->filter->platformApplyNeon(parameters->srcPixelArray.get(), parameters->dstPixelArray.get(),
-        parameters->kernelSizeX, parameters->kernelSizeY, paintSize);
-#else
     parameters->filter->platformApplyGeneric(parameters->srcPixelArray.get(), parameters->dstPixelArray.get(),
         parameters->kernelSizeX, parameters->kernelSizeY, paintSize);
-#endif
 }
 
 inline void FEGaussianBlur::platformApply(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize)
@@ -221,11 +230,7 @@
     }
 
     // The selection here eventually should happen dynamically on some platforms.
-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC)
-    platformApplyNeon(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize);
-#else
     platformApplyGeneric(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize);
-#endif
 }
 
 void FEGaussianBlur::calculateUnscaledKernelSize(unsigned& kernelSizeX, unsigned& kernelSizeY, float stdX, float stdY)

Modified: trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h (121899 => 121900)


--- trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h	2012-07-05 12:31:56 UTC (rev 121900)
@@ -73,7 +73,6 @@
     inline void platformApply(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize);
 
     inline void platformApplyGeneric(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize);
-    inline void platformApplyNeon(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize);
 #if USE(SKIA)
     virtual bool platformApplySkia();
 #endif

Deleted: trunk/Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.cpp (121899 => 121900)


--- trunk/Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.cpp	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.cpp	2012-07-05 12:31:56 UTC (rev 121900)
@@ -1,338 +0,0 @@
-/*
- * Copyright (C) 2011 University of Szeged
- * Copyright (C) 2011 Zoltan Herczeg
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "FEGaussianBlurNEON.h"
-
-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC)
-
-#include <wtf/Alignment.h>
-
-namespace WebCore {
-
-static WTF_ALIGNED(unsigned char, s_FEGaussianBlurConstantsForNeon[], 16) = {
-    // Mapping from NEON to ARM registers.
-    0, 4,  8,  12, 16, 16, 16, 16
-};
-
-unsigned char* feGaussianBlurConstantsForNeon()
-{
-    return s_FEGaussianBlurConstantsForNeon;
-}
-
-#define ASSTRING(str) #str
-#define TOSTRING(value) ASSTRING(value)
-
-#define STRIDE_OFFSET TOSTRING(0)
-#define STRIDE_WIDTH_OFFSET TOSTRING(4)
-#define STRIDE_LINE_OFFSET TOSTRING(8)
-#define STRIDE_LINE_WIDTH_OFFSET TOSTRING(12)
-#define REMAINING_STRIDES_OFFSET TOSTRING(16)
-#define DISTANCE_LEFT_OFFSET TOSTRING(20)
-#define DISTANCE_RIGHT_OFFSET TOSTRING(24)
-#define INVERTED_KERNEL_SIZE_OFFSET TOSTRING(28)
-#define PAINTING_CONSTANTS_OFFSET TOSTRING(32)
-#define NL "\n"
-
-// Register allocation.
-#define SOURCE_R                "r0"
-#define DESTINATION_R           "r1"
-#define LEFT_R                  "r2"
-#define RIGHT_R                 "r3"
-#define SOURCE_END_R            "r4"
-#define DESTINATION_END_R       "r5"
-#define STRIDE_R                "r6"
-#define STRIDE_WIDTH_R          "r7"
-#define STRIDE_LINE_R           "r8"
-#define SOURCE_LINE_END_R       "r10"
-#define DISTANCE_LEFT_R         "r11"
-#define DISTANCE_RIGHT_R        "r12"
-#define MAX_KERNEL_SIZE_R       "lr"
-
-// Alternate names.
-#define INIT_INVERTED_KERNEL_SIZE_R SOURCE_END_R
-#define INIT_PAINTING_CONSTANTS_R DESTINATION_END_R
-#define INIT_SUM_R LEFT_R
-#define REMAINING_STRIDES_R SOURCE_LINE_END_R
-
-#define INVERTED_KERNEL_SIZE_Q  "q0"
-#define SUM_Q                   "q1"
-#define PIXEL_Q                 "q2"
-#define PIXEL_D0                "d4"
-#define PIXEL_D1                "d5"
-#define PIXEL_D00               "d4[0]"
-#define PIXEL_D01               "d4[1]"
-#define PIXEL_S1                "s9"
-#define PIXEL_D10               "d5[0]"
-#define PIXEL_S2                "s10"
-#define PIXEL_D11               "d5[1]"
-#define REMAINING_STRIDES_S0    "s12"
-
-#define REMAP_NEON_ARM_Q        "d16"
-
-asm ( // NOLINT
-".globl " TOSTRING(neonDrawAllChannelGaussianBlur) NL
-TOSTRING(neonDrawAllChannelGaussianBlur) ":" NL
-    "stmdb sp!, {r4-r8, r10, r11, lr}" NL
-    "ldr " STRIDE_R ", [r2, #" STRIDE_OFFSET "]" NL
-    "ldr " STRIDE_WIDTH_R ", [r2, #" STRIDE_WIDTH_OFFSET "]" NL
-    "ldr " DISTANCE_LEFT_R ", [r2, #" DISTANCE_LEFT_OFFSET "]" NL
-    "ldr " DISTANCE_RIGHT_R ", [r2, #" DISTANCE_RIGHT_OFFSET "]" NL
-    "ldr " STRIDE_LINE_R ", [r2, #" STRIDE_LINE_OFFSET "]" NL
-    "ldr " SOURCE_LINE_END_R ", [r2, #" STRIDE_LINE_WIDTH_OFFSET "]" NL
-    "ldr " INIT_INVERTED_KERNEL_SIZE_R ", [r2, #" INVERTED_KERNEL_SIZE_OFFSET "]" NL
-    "ldr " INIT_PAINTING_CONSTANTS_R ", [r2, #" PAINTING_CONSTANTS_OFFSET "]" NL
-
-    // Initialize locals.
-    "mul " DISTANCE_LEFT_R ", " DISTANCE_LEFT_R ", " STRIDE_R NL
-    "mul " DISTANCE_RIGHT_R ", " DISTANCE_RIGHT_R ", " STRIDE_R NL
-    "mov " MAX_KERNEL_SIZE_R ", " DISTANCE_RIGHT_R NL
-    "cmp " MAX_KERNEL_SIZE_R ", " STRIDE_WIDTH_R NL
-    "movcs " MAX_KERNEL_SIZE_R ", " STRIDE_WIDTH_R NL
-    "add " SOURCE_LINE_END_R ", " SOURCE_LINE_END_R ", " SOURCE_R NL
-    "vdup.f32 " INVERTED_KERNEL_SIZE_Q ", " INIT_INVERTED_KERNEL_SIZE_R NL
-    "vld1.f32 { " REMAP_NEON_ARM_Q " }, [" INIT_PAINTING_CONSTANTS_R "]!" NL
-
-".allChannelMainLoop:" NL
-
-    // Initialize the sum variable.
-    "vmov.u32 " SUM_Q ", #0" NL
-    "mov " INIT_SUM_R ", " SOURCE_R NL
-    "add " SOURCE_END_R ", " SOURCE_R ", " MAX_KERNEL_SIZE_R NL
-    "cmp " INIT_SUM_R ", " SOURCE_END_R NL
-    "bcs .allChannelInitSumDone" NL
-".allChannelInitSum:" NL
-    "vld1.u32 " PIXEL_D00 ", [" INIT_SUM_R "], " STRIDE_R NL
-    "vmovl.u8 " PIXEL_Q ", " PIXEL_D0 NL
-    "vmovl.u16 " PIXEL_Q ", " PIXEL_D0 NL
-    "vadd.u32 " SUM_Q ", " SUM_Q ", " PIXEL_Q NL
-    "cmp " INIT_SUM_R ", " SOURCE_END_R NL
-    "bcc .allChannelInitSum" NL
-".allChannelInitSumDone:" NL
-
-    // Blurring.
-    "add " SOURCE_END_R ", " SOURCE_R ", " STRIDE_WIDTH_R NL
-    "add " DESTINATION_END_R ", " DESTINATION_R ", " STRIDE_WIDTH_R NL
-    "sub " LEFT_R ", " SOURCE_R ", " DISTANCE_LEFT_R NL
-    "add " RIGHT_R ", " SOURCE_R ", " DISTANCE_RIGHT_R NL
-
-".allChannelBlur:" NL
-    "vcvt.f32.u32 " PIXEL_Q ", " SUM_Q NL
-    "vmul.f32 " PIXEL_Q ", " PIXEL_Q ", " INVERTED_KERNEL_SIZE_Q NL
-    "vcvt.u32.f32 " PIXEL_Q ", " PIXEL_Q NL
-    "vtbl.8 " PIXEL_D0 ", {" PIXEL_D0 "-" PIXEL_D1 "}, " REMAP_NEON_ARM_Q NL
-    "vst1.u32 " PIXEL_D00 ", [" DESTINATION_R "], " STRIDE_R NL
-
-    "cmp " LEFT_R ", " SOURCE_R NL
-    "bcc .allChannelSkipLeft" NL
-    "vld1.u32 " PIXEL_D00 ", [" LEFT_R "]" NL
-    "vmovl.u8 " PIXEL_Q ", " PIXEL_D0 NL
-    "vmovl.u16 " PIXEL_Q ", " PIXEL_D0 NL
-    "vsub.u32 " SUM_Q ", " SUM_Q ", " PIXEL_Q NL
-".allChannelSkipLeft: " NL
-
-    "cmp " RIGHT_R ", " SOURCE_END_R NL
-    "bcs .allChannelSkipRight" NL
-    "vld1.u32 " PIXEL_D00 ", [" RIGHT_R "]" NL
-    "vmovl.u8 " PIXEL_Q ", " PIXEL_D0 NL
-    "vmovl.u16 " PIXEL_Q ", " PIXEL_D0 NL
-    "vadd.u32 " SUM_Q ", " SUM_Q ", " PIXEL_Q NL
-".allChannelSkipRight: " NL
-
-    "add " LEFT_R ", " LEFT_R ", " STRIDE_R NL
-    "add " RIGHT_R ", " RIGHT_R ", " STRIDE_R NL
-    "cmp " DESTINATION_R ", " DESTINATION_END_R NL
-    "bcc .allChannelBlur" NL
-    "sub " DESTINATION_R ", " DESTINATION_R ", " STRIDE_WIDTH_R NL
-
-    "add " SOURCE_R ", " SOURCE_R ", " STRIDE_LINE_R NL
-    "add " DESTINATION_R ", " DESTINATION_R ", " STRIDE_LINE_R NL
-    "cmp " SOURCE_R ", " SOURCE_LINE_END_R NL
-    "bcc .allChannelMainLoop" NL
-
-    "ldmia sp!, {r4-r8, r10, r11, pc}" NL
-); // NOLINT
-
-#define DATA_TRANSFER4(command, base) \
-    command " " PIXEL_D00 ", [" base "], " STRIDE_LINE_R NL \
-    command " " PIXEL_D01 ", [" base "], " STRIDE_LINE_R NL \
-    command " " PIXEL_D10 ", [" base "], " STRIDE_LINE_R NL \
-    command " " PIXEL_D11 ", [" base "], " STRIDE_LINE_R NL \
-    "sub " base ", " base ", " STRIDE_LINE_R ", lsl #2" NL
-
-// The number of reads depend on REMAINING_STRIDES_R, but it is always >= 1 and <= 3
-#define CONDITIONAL_DATA_TRANSFER4(command1, command2, base) \
-    command1 " " PIXEL_D00 ", [" base "], " STRIDE_LINE_R NL \
-    "cmp " REMAINING_STRIDES_R ", #2" NL \
-    command2 "cs " PIXEL_S1 ", [" base "]" NL \
-    "add " base ", " base ", " STRIDE_LINE_R NL \
-    "cmp " REMAINING_STRIDES_R ", #3" NL \
-    command2 "cs " PIXEL_S2 ", [" base "]" NL \
-    "sub " base ", " base ", " STRIDE_LINE_R ", lsl #1" NL
-
-asm ( // NOLINT
-".globl " TOSTRING(neonDrawAlphaChannelGaussianBlur) NL
-TOSTRING(neonDrawAlphaChannelGaussianBlur) ":" NL
-    "stmdb sp!, {r4-r8, r10, r11, lr}" NL
-    "ldr " STRIDE_R ", [r2, #" STRIDE_OFFSET "]" NL
-    "ldr " STRIDE_WIDTH_R ", [r2, #" STRIDE_WIDTH_OFFSET "]" NL
-    "ldr " DISTANCE_LEFT_R ", [r2, #" DISTANCE_LEFT_OFFSET "]" NL
-    "ldr " DISTANCE_RIGHT_R ", [r2, #" DISTANCE_RIGHT_OFFSET "]" NL
-    "ldr " STRIDE_LINE_R ", [r2, #" STRIDE_LINE_OFFSET "]" NL
-    "ldr " SOURCE_LINE_END_R ", [r2, #" STRIDE_LINE_WIDTH_OFFSET "]" NL
-    "ldr " INIT_INVERTED_KERNEL_SIZE_R ", [r2, #" INVERTED_KERNEL_SIZE_OFFSET "]" NL
-    "vldr.u32 " REMAINING_STRIDES_S0 ", [r2, #" REMAINING_STRIDES_OFFSET "]" NL
-
-    // Initialize locals.
-    "mul " DISTANCE_LEFT_R ", " DISTANCE_LEFT_R ", " STRIDE_R NL
-    "mul " DISTANCE_RIGHT_R ", " DISTANCE_RIGHT_R ", " STRIDE_R NL
-    "mov " MAX_KERNEL_SIZE_R ", " DISTANCE_RIGHT_R NL
-    "cmp " MAX_KERNEL_SIZE_R ", " STRIDE_WIDTH_R NL
-    "movcs " MAX_KERNEL_SIZE_R ", " STRIDE_WIDTH_R NL
-    "add " SOURCE_LINE_END_R ", " SOURCE_LINE_END_R ", " SOURCE_R NL
-    "vdup.f32 " INVERTED_KERNEL_SIZE_Q ", " INIT_INVERTED_KERNEL_SIZE_R NL
-    "cmp " SOURCE_LINE_END_R ", " SOURCE_R NL
-    "beq .alphaChannelEarlyLeave" NL
-
-    // Processing 4 strides parallelly.
-
-".alphaChannelMainLoop:" NL
-
-    // Initialize the sum variable.
-    "vmov.u32 " SUM_Q ", #0" NL
-    "mov " INIT_SUM_R ", " SOURCE_R NL
-    "add " SOURCE_END_R ", " SOURCE_R ", " MAX_KERNEL_SIZE_R NL
-    "cmp " INIT_SUM_R ", " SOURCE_END_R NL
-    "bcs .alphaChannelInitSumDone" NL
-".alphaChannelInitSum:" NL
-    DATA_TRANSFER4("vld1.u32", INIT_SUM_R)
-    "vshr.u32 " PIXEL_Q ", " PIXEL_Q ", #24" NL
-    "vadd.u32 " SUM_Q ", " SUM_Q ", " PIXEL_Q NL
-    "add " INIT_SUM_R ", " INIT_SUM_R ", " STRIDE_R NL
-    "cmp " INIT_SUM_R ", " SOURCE_END_R NL
-    "bcc .alphaChannelInitSum" NL
-".alphaChannelInitSumDone:" NL
-
-    // Blurring.
-    "add " SOURCE_END_R ", " SOURCE_R ", " STRIDE_WIDTH_R NL
-    "add " DESTINATION_END_R ", " DESTINATION_R ", " STRIDE_WIDTH_R NL
-    "sub " LEFT_R ", " SOURCE_R ", " DISTANCE_LEFT_R NL
-    "add " RIGHT_R ", " SOURCE_R ", " DISTANCE_RIGHT_R NL
-
-".alphaChannelBlur:" NL
-    "vcvt.f32.u32 " PIXEL_Q ", " SUM_Q NL
-    "vmul.f32 " PIXEL_Q ", " PIXEL_Q ", " INVERTED_KERNEL_SIZE_Q NL
-    "vcvt.u32.f32 " PIXEL_Q ", " PIXEL_Q NL
-    "vshl.u32 " PIXEL_Q ", " PIXEL_Q ", #24" NL
-    DATA_TRANSFER4("vst1.u32", DESTINATION_R)
-
-    "cmp " LEFT_R ", " SOURCE_R NL
-    "bcc .alphaChannelSkipLeft" NL
-    DATA_TRANSFER4("vld1.u32", LEFT_R)
-    "vshr.u32 " PIXEL_Q ", " PIXEL_Q ", #24" NL
-    "vsub.u32 " SUM_Q ", " SUM_Q ", " PIXEL_Q NL
-".alphaChannelSkipLeft: " NL
-
-    "cmp " RIGHT_R ", " SOURCE_END_R NL
-    "bcs .alphaChannelSkipRight" NL
-    DATA_TRANSFER4("vld1.u32", RIGHT_R)
-    "vshr.u32 " PIXEL_Q ", " PIXEL_Q ", #24" NL
-    "vadd.u32 " SUM_Q ", " SUM_Q ", " PIXEL_Q NL
-".alphaChannelSkipRight: " NL
-
-    "add " DESTINATION_R ", " DESTINATION_R ", " STRIDE_R NL
-    "add " LEFT_R ", " LEFT_R ", " STRIDE_R NL
-    "add " RIGHT_R ", " RIGHT_R ", " STRIDE_R NL
-    "cmp " DESTINATION_R ", " DESTINATION_END_R NL
-    "bcc .alphaChannelBlur" NL
-    "sub " DESTINATION_R ", " DESTINATION_R ", " STRIDE_WIDTH_R NL
-
-    "add " SOURCE_R ", " SOURCE_R ", " STRIDE_LINE_R ", lsl #2" NL
-    "add " DESTINATION_R ", " DESTINATION_R ", " STRIDE_LINE_R ", lsl #2" NL
-    "cmp " SOURCE_R ", " SOURCE_LINE_END_R NL
-    "bcc .alphaChannelMainLoop" NL
-
-    // Processing the remaining strides (0 - 3).
-".alphaChannelEarlyLeave:" NL
-    "vmov.u32 " REMAINING_STRIDES_R ", " REMAINING_STRIDES_S0 NL
-    // Early return for 0 strides.
-    "cmp " REMAINING_STRIDES_R ", #0" NL
-    "ldmeqia sp!, {r4-r8, r10, r11, pc}" NL
-
-    // Initialize the sum variable.
-    "vmov.u32 " SUM_Q ", #0" NL
-    "mov " INIT_SUM_R ", " SOURCE_R NL
-    "add " SOURCE_END_R ", " SOURCE_R ", " MAX_KERNEL_SIZE_R NL
-    "cmp " INIT_SUM_R ", " SOURCE_END_R NL
-    "bcs .alphaChannelSecondInitSumDone" NL
-".alphaChannelSecondInitSum:" NL
-    CONDITIONAL_DATA_TRANSFER4("vld1.u32", "vldr", INIT_SUM_R)
-    "vshr.u32 " PIXEL_Q ", " PIXEL_Q ", #24" NL
-    "vadd.u32 " SUM_Q ", " SUM_Q ", " PIXEL_Q NL
-    "add " INIT_SUM_R ", " INIT_SUM_R ", " STRIDE_R NL
-    "cmp " INIT_SUM_R ", " SOURCE_END_R NL
-    "bcc .alphaChannelSecondInitSum" NL
-".alphaChannelSecondInitSumDone:" NL
-
-    // Blurring.
-    "add " SOURCE_END_R ", " SOURCE_R ", " STRIDE_WIDTH_R NL
-    "add " DESTINATION_END_R ", " DESTINATION_R ", " STRIDE_WIDTH_R NL
-    "sub " LEFT_R ", " SOURCE_R ", " DISTANCE_LEFT_R NL
-    "add " RIGHT_R ", " SOURCE_R ", " DISTANCE_RIGHT_R NL
-
-".alphaChannelSecondBlur:" NL
-    "vcvt.f32.u32 " PIXEL_Q ", " SUM_Q NL
-    "vmul.f32 " PIXEL_Q ", " PIXEL_Q ", " INVERTED_KERNEL_SIZE_Q NL
-    "vcvt.u32.f32 " PIXEL_Q ", " PIXEL_Q NL
-    "vshl.u32 " PIXEL_Q ", " PIXEL_Q ", #24" NL
-    CONDITIONAL_DATA_TRANSFER4("vst1.u32", "vstr", DESTINATION_R)
-
-    "cmp " LEFT_R ", " SOURCE_R NL
-    "bcc .alphaChannelSecondSkipLeft" NL
-    CONDITIONAL_DATA_TRANSFER4("vld1.u32", "vldr", LEFT_R)
-    "vshr.u32 " PIXEL_Q ", " PIXEL_Q ", #24" NL
-    "vsub.u32 " SUM_Q ", " SUM_Q ", " PIXEL_Q NL
-".alphaChannelSecondSkipLeft: " NL
-
-    "cmp " RIGHT_R ", " SOURCE_END_R NL
-    "bcs .alphaChannelSecondSkipRight" NL
-    CONDITIONAL_DATA_TRANSFER4("vld1.u32", "vldr", RIGHT_R)
-    "vshr.u32 " PIXEL_Q ", " PIXEL_Q ", #24" NL
-    "vadd.u32 " SUM_Q ", " SUM_Q ", " PIXEL_Q NL
-".alphaChannelSecondSkipRight: " NL
-
-    "add " DESTINATION_R ", " DESTINATION_R ", " STRIDE_R NL
-    "add " LEFT_R ", " LEFT_R ", " STRIDE_R NL
-    "add " RIGHT_R ", " RIGHT_R ", " STRIDE_R NL
-    "cmp " DESTINATION_R ", " DESTINATION_END_R NL
-    "bcc .alphaChannelSecondBlur" NL
-
-    "ldmia sp!, {r4-r8, r10, r11, pc}" NL
-); // NOLINT
-
-} // namespace WebCore
-
-#endif // CPU(ARM_NEON) && COMPILER(GCC)

Modified: trunk/Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.h (121899 => 121900)


--- trunk/Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.h	2012-07-05 11:21:47 UTC (rev 121899)
+++ trunk/Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.h	2012-07-05 12:31:56 UTC (rev 121900)
@@ -29,82 +29,45 @@
 
 #include <wtf/Platform.h>
 
-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC)
+#if CPU(ARM_NEON) && COMPILER(GCC)
 
 #include "FEGaussianBlur.h"
+#include "NEONHelpers.h"
 
 namespace WebCore {
 
-struct FEGaussianBlurPaintingDataForNeon {
-    int stride;
-    int strideWidth;
-    int strideLine;
-    int strideLineWidth;
-    int remainingStrides;
-    int distanceLeft;
-    int distanceRight;
-    float invertedKernelSize;
-    unsigned char* paintingConstants;
-};
-
-unsigned char* feGaussianBlurConstantsForNeon();
-
-extern "C" {
-void neonDrawAllChannelGaussianBlur(unsigned char* source, unsigned char* destination, FEGaussianBlurPaintingDataForNeon*);
-void neonDrawAlphaChannelGaussianBlur(unsigned char* source, unsigned char* destination, FEGaussianBlurPaintingDataForNeon*);
-}
-
-inline void FEGaussianBlur::platformApplyNeon(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize)
+inline void boxBlurNEON(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* dstPixelArray,
+                    unsigned dx, int dxLeft, int dxRight, int stride, int strideLine, int effectWidth, int effectHeight)
 {
-    const int widthMultipliedByFour = 4 * paintSize.width();
-    FEGaussianBlurPaintingDataForNeon argumentsX = {
-        4,
-        widthMultipliedByFour,
-        widthMultipliedByFour,
-        (isAlphaImage() ? ((paintSize.height() >> 2) << 2) : paintSize.height()) * widthMultipliedByFour,
-        isAlphaImage() ? (paintSize.height() & 0x3) : 0,
-        0,
-        0,
-        0,
-        isAlphaImage() ? 0 : feGaussianBlurConstantsForNeon()
-    };
-    FEGaussianBlurPaintingDataForNeon argumentsY = {
-        widthMultipliedByFour,
-        widthMultipliedByFour * paintSize.height(),
-        4,
-        (isAlphaImage() ? ((paintSize.width() >> 2) << 2) : paintSize.width()) * 4,
-        isAlphaImage() ? (paintSize.width() & 0x3) : 0,
-        0,
-        0,
-        0,
-        isAlphaImage() ? 0 : feGaussianBlurConstantsForNeon()
-    };
+    uint32_t* sourcePixel = reinterpret_cast<uint32_t*>(srcPixelArray->data());
+    uint32_t* destinationPixel = reinterpret_cast<uint32_t*>(dstPixelArray->data());
 
-    for (int i = 0; i < 3; ++i) {
-        if (kernelSizeX) {
-            kernelPosition(i, kernelSizeX, argumentsX.distanceLeft, argumentsX.distanceRight);
-            argumentsX.invertedKernelSize = 1 / static_cast<float>(kernelSizeX);
-            if (isAlphaImage())
-                neonDrawAlphaChannelGaussianBlur(srcPixelArray->data(), tmpPixelArray->data(), &argumentsX);
-            else
-                neonDrawAllChannelGaussianBlur(srcPixelArray->data(), tmpPixelArray->data(), &argumentsX);
-        } else {
-            Uint8ClampedArray* auxPixelArray = tmpPixelArray;
-            tmpPixelArray = srcPixelArray;
-            srcPixelArray = auxPixelArray;
+    float32x4_t deltaX = vdupq_n_f32(1.0 / dx);
+    int pixelLine = strideLine / 4;
+
+    for (int y = 0; y < effectHeight; ++y) {
+        int line = y * pixelLine;
+        float32x4_t sum = vdupq_n_f32(0);
+        // Fill the kernel
+        int maxKernelSize = std::min(dxRight, effectWidth);
+        for (int i = 0; i < maxKernelSize; ++i) {
+            float32x4_t sourcePixelAsFloat = loadRGBA8AsFloat(sourcePixel + line + i);
+            sum = vaddq_f32(sum, sourcePixelAsFloat);
         }
 
-        if (kernelSizeY) {
-            kernelPosition(i, kernelSizeY, argumentsY.distanceLeft, argumentsY.distanceRight);
-            argumentsY.invertedKernelSize = 1 / static_cast<float>(kernelSizeY);
-            if (isAlphaImage())
-                neonDrawAlphaChannelGaussianBlur(tmpPixelArray->data(), srcPixelArray->data(), &argumentsY);
-            else
-                neonDrawAllChannelGaussianBlur(tmpPixelArray->data(), srcPixelArray->data(), &argumentsY);
-        } else {
-            Uint8ClampedArray* auxPixelArray = tmpPixelArray;
-            tmpPixelArray = srcPixelArray;
-            srcPixelArray = auxPixelArray;
+        // Blurring
+        for (int x = 0; x < effectWidth; ++x) {
+            int pixelOffset = line + x;
+            float32x4_t result = vmulq_f32(sum, deltaX);
+            storeFloatAsRGBA8(result, destinationPixel+pixelOffset);
+            if (x >= dxLeft) {
+                float32x4_t sourcePixelAsFloat = loadRGBA8AsFloat(sourcePixel + pixelOffset - dxLeft);
+                sum = vsubq_f32(sum, sourcePixelAsFloat);
+            }
+            if (x + dxRight < effectWidth) {
+                float32x4_t sourcePixelAsFloat = loadRGBA8AsFloat(sourcePixel + pixelOffset + dxRight);
+                sum = vaddq_f32(sum, sourcePixelAsFloat);
+            }
         }
     }
 }

Added: trunk/Source/WebCore/platform/graphics/filters/arm/NEONHelpers.h (0 => 121900)


--- trunk/Source/WebCore/platform/graphics/filters/arm/NEONHelpers.h	                        (rev 0)
+++ trunk/Source/WebCore/platform/graphics/filters/arm/NEONHelpers.h	2012-07-05 12:31:56 UTC (rev 121900)
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 University of Szeged
+ * Copyright (C) 2012 Gabor Rapcsanyi
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef NEONHelpers_h
+#define NEONHelpers_h
+
+#include <wtf/Platform.h>
+
+#if CPU(ARM_NEON) && COMPILER(GCC)
+
+#include <arm_neon.h>
+
+namespace WebCore {
+
+inline float32x4_t loadRGBA8AsFloat(uint32_t* source)
+{
+    uint32x2_t temporary1 = vset_lane_u32(*source, temporary1, 0);
+    uint16x4_t temporary2 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(temporary1)));
+    return vcvtq_f32_u32(vmovl_u16(temporary2));
+}
+
+inline void storeFloatAsRGBA8(float32x4_t data, uint32_t* destination)
+{
+    uint16x4_t temporary1 = vmovn_u32(vcvtq_u32_f32(data));
+    uint8x8_t temporary2 = vmovn_u16(vcombine_u16(temporary1, temporary1));
+    *destination = vget_lane_u32(vreinterpret_u32_u8(temporary2), 0);
+}
+
+} // namespace WebCore
+
+#endif // CPU(ARM_NEON) && COMPILER(GCC)
+
+#endif // NEONHelpers_h
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes

Reply via email to