Title: [134378] trunk/Source/WebCore
Revision
134378
Author
rga...@webkit.org
Date
2012-11-13 00:33:08 -0800 (Tue, 13 Nov 2012)

Log Message

Optimize RGBA4444ToRGBA8 packing/unpacking functions with NEON intrinsics in GraphicsContext3D
https://bugs.webkit.org/show_bug.cgi?id=101473

Reviewed by Zoltan Herczeg.

With NEON intrinsics the packing/unpacking functions can be optimized well.
This particular function is about 3 times faster with ARM NEON. On top level tests
the speed up was 1.18x.

* CMakeLists.txt:
* GNUmakefile.am:
* GNUmakefile.list.am:
* Target.pri:
* WebCore.gyp/WebCore.gyp:
* WebCore.gypi:
* WebCore.pri:
* WebCore.xcodeproj/project.pbxproj:
* platform/graphics/GraphicsContext3D.cpp:
(WebCore):
* platform/graphics/cpu/arm/GraphicsContext3DNEON.h: Added.
(WebCore):
(ARM):
(WebCore::ARM::unpackOneRowOfRGBA4444ToRGBA8NEON):
(WebCore::ARM::packOneRowOfRGBA8ToUnsignedShort4444NEON):

Modified Paths

Added Paths

Diff

Modified: trunk/Source/WebCore/CMakeLists.txt (134377 => 134378)


--- trunk/Source/WebCore/CMakeLists.txt	2012-11-13 08:23:48 UTC (rev 134377)
+++ trunk/Source/WebCore/CMakeLists.txt	2012-11-13 08:33:08 UTC (rev 134378)
@@ -48,6 +48,7 @@
     "${WEBCORE_DIR}/platform/animation"
     "${WEBCORE_DIR}/platform/audio"
     "${WEBCORE_DIR}/platform/graphics"
+    "${WEBCORE_DIR}/platform/graphics/cpu/arm"
     "${WEBCORE_DIR}/platform/graphics/filters"
     "${WEBCORE_DIR}/platform/graphics/filters/arm"
     "${WEBCORE_DIR}/platform/graphics/harfbuzz"

Modified: trunk/Source/WebCore/ChangeLog (134377 => 134378)


--- trunk/Source/WebCore/ChangeLog	2012-11-13 08:23:48 UTC (rev 134377)
+++ trunk/Source/WebCore/ChangeLog	2012-11-13 08:33:08 UTC (rev 134378)
@@ -1,3 +1,30 @@
+2012-11-13  Gabor Rapcsanyi  <rga...@webkit.org>
+
+        Optimize RGBA4444ToRGBA8 packing/unpacking functions with NEON intrinsics in GraphicsContext3D
+        https://bugs.webkit.org/show_bug.cgi?id=101473
+
+        Reviewed by Zoltan Herczeg.
+
+        With NEON intrinsics the packing/unpacking functions can be optimized well.
+        This particular function is about 3 times faster with ARM NEON. On top level tests
+        the speed up was 1.18x.
+
+        * CMakeLists.txt:
+        * GNUmakefile.am:
+        * GNUmakefile.list.am:
+        * Target.pri:
+        * WebCore.gyp/WebCore.gyp:
+        * WebCore.gypi:
+        * WebCore.pri:
+        * WebCore.xcodeproj/project.pbxproj:
+        * platform/graphics/GraphicsContext3D.cpp:
+        (WebCore):
+        * platform/graphics/cpu/arm/GraphicsContext3DNEON.h: Added.
+        (WebCore):
+        (ARM):
+        (WebCore::ARM::unpackOneRowOfRGBA4444ToRGBA8NEON):
+        (WebCore::ARM::packOneRowOfRGBA8ToUnsignedShort4444NEON):
+
 2012-11-13  Takashi Sakamoto  <ta...@google.com>
 
         Crash when replacing parts of text inputs with content: url(...)

Modified: trunk/Source/WebCore/GNUmakefile.am (134377 => 134378)


--- trunk/Source/WebCore/GNUmakefile.am	2012-11-13 08:23:48 UTC (rev 134377)
+++ trunk/Source/WebCore/GNUmakefile.am	2012-11-13 08:33:08 UTC (rev 134378)
@@ -54,6 +54,7 @@
 	-I$(srcdir)/Source/WebCore/platform/animation \
 	-I$(srcdir)/Source/WebCore/platform/audio \
 	-I$(srcdir)/Source/WebCore/platform/graphics \
+	-I$(srcdir)/Source/WebCore/platform/graphics/cpu/arm \
 	-I$(srcdir)/Source/WebCore/platform/graphics/filters \
 	-I$(srcdir)/Source/WebCore/platform/graphics/filters/arm \
 	-I$(srcdir)/Source/WebCore/platform/graphics/gpu \

Modified: trunk/Source/WebCore/GNUmakefile.list.am (134377 => 134378)


--- trunk/Source/WebCore/GNUmakefile.list.am	2012-11-13 08:23:48 UTC (rev 134377)
+++ trunk/Source/WebCore/GNUmakefile.list.am	2012-11-13 08:33:08 UTC (rev 134378)
@@ -4270,6 +4270,7 @@
 	Source/WebCore/platform/HistogramSupport.h \
 	Source/WebCore/platform/graphics/ANGLEWebKitBridge.cpp \
 	Source/WebCore/platform/graphics/ANGLEWebKitBridge.h \
+	Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h \
 	Source/WebCore/platform/graphics/BitmapImage.cpp \
 	Source/WebCore/platform/graphics/BitmapImage.h \
 	Source/WebCore/platform/graphics/Color.cpp \

Modified: trunk/Source/WebCore/Target.pri (134377 => 134378)


--- trunk/Source/WebCore/Target.pri	2012-11-13 08:23:48 UTC (rev 134377)
+++ trunk/Source/WebCore/Target.pri	2012-11-13 08:33:08 UTC (rev 134378)
@@ -3956,6 +3956,7 @@
 
 use?(3D_GRAPHICS) {
     HEADERS += \
+        platform/graphics/cpu/arm/GraphicsContext3DNEON.h \
         platform/graphics/ANGLEWebKitBridge.h \
         platform/graphics/Extensions3D.h \
         platform/graphics/GraphicsContext3D.h \

Modified: trunk/Source/WebCore/WebCore.gyp/WebCore.gyp (134377 => 134378)


--- trunk/Source/WebCore/WebCore.gyp/WebCore.gyp	2012-11-13 08:23:48 UTC (rev 134377)
+++ trunk/Source/WebCore/WebCore.gyp/WebCore.gyp	2012-11-13 08:33:08 UTC (rev 134378)
@@ -111,6 +111,7 @@
       '../platform/graphics',
       '../platform/graphics/chromium',
       '../platform/graphics/chromium/cc',
+      '../platform/graphics/cpu/arm',
       '../platform/graphics/filters',
       '../platform/graphics/filters/arm',
       '../platform/graphics/filters/skia',

Modified: trunk/Source/WebCore/WebCore.gypi (134377 => 134378)


--- trunk/Source/WebCore/WebCore.gypi	2012-11-13 08:23:48 UTC (rev 134377)
+++ trunk/Source/WebCore/WebCore.gypi	2012-11-13 08:33:08 UTC (rev 134378)
@@ -414,6 +414,7 @@
             'platform/graphics/TextRenderingMode.h',
             'platform/graphics/TextRun.h',
             'platform/graphics/TypesettingFeatures.h',
+            'platform/graphics/cpu/arm/GraphicsContext3DNEON.h',
             'platform/graphics/cg/ImageBufferDataCG.h',
             'platform/graphics/mac/ColorMac.h',
             'platform/graphics/mac/MediaPlayerProxy.h',

Modified: trunk/Source/WebCore/WebCore.pri (134377 => 134378)


--- trunk/Source/WebCore/WebCore.pri	2012-11-13 08:23:48 UTC (rev 134377)
+++ trunk/Source/WebCore/WebCore.pri	2012-11-13 08:33:08 UTC (rev 134378)
@@ -53,6 +53,7 @@
     $$SOURCE_DIR/platform/animation \
     $$SOURCE_DIR/platform/audio \
     $$SOURCE_DIR/platform/graphics \
+    $$SOURCE_DIR/platform/graphics/cpu/arm \
     $$SOURCE_DIR/platform/graphics/filters \
     $$SOURCE_DIR/platform/graphics/filters/arm \
     $$SOURCE_DIR/platform/graphics/opengl \

Modified: trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj (134377 => 134378)


--- trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj	2012-11-13 08:23:48 UTC (rev 134377)
+++ trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj	2012-11-13 08:33:08 UTC (rev 134378)
@@ -10473,6 +10473,7 @@
 		93309E9F099EB78C0056E581 /* SharedTimerMac.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = SharedTimerMac.mm; sourceTree = "<group>"; };
 		93309EA0099EB78C0056E581 /* SharedTimer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SharedTimer.h; sourceTree = "<group>"; };
 		93309EA1099EB78C0056E581 /* Timer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Timer.cpp; sourceTree = "<group>"; };
+		9332AB3D16515D7700D827EC /* GraphicsContext3DNEON.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GraphicsContext3DNEON.h; sourceTree = "<group>"; };
 		93354A3B0B24F8C9003F6DEA /* UIEventWithKeyState.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = UIEventWithKeyState.cpp; sourceTree = "<group>"; };
 		933A142B0B7D188600A53FFD /* TextEvent.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TextEvent.cpp; sourceTree = "<group>"; };
 		933A142C0B7D188600A53FFD /* TextEvent.idl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = TextEvent.idl; sourceTree = "<group>"; };
@@ -17365,6 +17366,22 @@
 			tabWidth = 4;
 			usesTabs = 0;
 		};
+		9332AB3B16515D7700D827EC /* cpu */ = {
+			isa = PBXGroup;
+			children = (
+				9332AB3C16515D7700D827EC /* arm */,
+			);
+			path = cpu;
+			sourceTree = "<group>";
+		};
+		9332AB3C16515D7700D827EC /* arm */ = {
+			isa = PBXGroup;
+			children = (
+				9332AB3D16515D7700D827EC /* GraphicsContext3DNEON.h */,
+			);
+			path = arm;
+			sourceTree = "<group>";
+		};
 		9363B6290F8E8FE000803810 /* cf */ = {
 			isa = PBXGroup;
 			children = (
@@ -19931,6 +19948,7 @@
 		B2A015910AF6CD53006BCE0E /* graphics */ = {
 			isa = PBXGroup;
 			children = (
+				9332AB3B16515D7700D827EC /* cpu */,
 				076F0D0812B8192700C26AA4 /* avfoundation */,
 				499B3EC0128CCC1800E726C2 /* ca */,
 				B27535290B053814002CE64F /* cg */,

Modified: trunk/Source/WebCore/platform/graphics/GraphicsContext3D.cpp (134377 => 134378)


--- trunk/Source/WebCore/platform/graphics/GraphicsContext3D.cpp	2012-11-13 08:23:48 UTC (rev 134377)
+++ trunk/Source/WebCore/platform/graphics/GraphicsContext3D.cpp	2012-11-13 08:33:08 UTC (rev 134378)
@@ -29,6 +29,7 @@
 #if USE(3D_GRAPHICS)
 
 #include "GraphicsContext3D.h"
+#include "GraphicsContext3DNEON.h"
 
 #include "CheckedInt.h"
 #include "DrawingBuffer.h"
@@ -541,6 +542,16 @@
 
 void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
 {
+#if HAVE(ARM_NEON_INTRINSICS)
+    unsigned tailPixels = pixelsPerRow % 8;
+    unsigned pixelSize = pixelsPerRow - tailPixels;
+
+    ARM::unpackOneRowOfRGBA4444ToRGBA8NEON(source, destination, pixelSize);
+
+    source += pixelSize;
+    destination += pixelSize * 4;
+    pixelsPerRow = tailPixels;
+#endif
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         uint16_t packedValue = source[0];
         uint8_t r = packedValue >> 12;
@@ -947,6 +958,17 @@
 
 void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t* source, uint16_t* destination, unsigned int pixelsPerRow)
 {
+#if HAVE(ARM_NEON_INTRINSICS)
+    unsigned componentsPerRow = pixelsPerRow * 4;
+    unsigned tailComponents = componentsPerRow % 32;
+    unsigned componentsSize = componentsPerRow - tailComponents;
+
+    ARM::packOneRowOfRGBA8ToUnsignedShort4444NEON(source, destination, componentsSize);
+
+    source += componentsSize;
+    destination += componentsSize / 4;
+    pixelsPerRow = tailComponents / 4;
+#endif
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         *destination = (((source[0] & 0xF0) << 8)
                         | ((source[1] & 0xF0) << 4)

Added: trunk/Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h (0 => 134378)


--- trunk/Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h	                        (rev 0)
+++ trunk/Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h	2012-11-13 08:33:08 UTC (rev 134378)
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2012 Gabor Rapcsanyi (rga...@inf.u-szeged.hu), University of Szeged
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef GraphicsContext3DNEON_h
+#define GraphicsContext3DNEON_h
+
+#if HAVE(ARM_NEON_INTRINSICS)
+
+#include <arm_neon.h>
+
+namespace WebCore {
+
+namespace ARM {
+
+ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8NEON(const uint16_t* source, uint8_t* destination, unsigned pixelSize)
+{
+    uint16x8_t constant = vdupq_n_u16(0x0F);
+    for (unsigned i = 0; i < pixelSize; i += 8) {
+        uint16x8_t eightPixels = vld1q_u16(source + i);
+
+        uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12));
+        uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8), constant));
+        uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4), constant));
+        uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, constant));
+
+        componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR);
+        componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG);
+        componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB);
+        componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA);
+
+        uint8x8x4_t destComponents = {componentR, componentG, componentB, componentA};
+        vst4_u8(destination, destComponents);
+        destination += 32;
+    }
+}
+
+ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444NEON(const uint8_t* source, uint16_t* destination, unsigned componentsSize)
+{
+    uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
+    uint8x8_t constant = vdup_n_u8(0xF0);
+    for (unsigned i = 0; i < componentsSize; i += 32) {
+        uint8x8x4_t RGBA8 = vld4_u8(source + i);
+
+        uint8x8_t componentR = vand_u8(RGBA8.val[0], constant);
+        uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], constant), 4);
+        uint8x8_t componentB = vand_u8(RGBA8.val[2], constant);
+        uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], constant), 4);
+
+        uint8x8x2_t RGBA4;
+        RGBA4.val[0] = vorr_u8(componentB, componentA);
+        RGBA4.val[1] = vorr_u8(componentR, componentG);
+        vst2_u8(dst, RGBA4);
+        dst += 16;
+    }
+}
+
+} // namespace ARM
+
+} // namespace WebCore
+
+#endif // HAVE(ARM_NEON_INTRINSICS)
+
+#endif // GraphicsContext3DNEON_h
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to