Title: [102312] trunk/Source/WebCore
- Revision
- 102312
- Author
- [email protected]
- Date
- 2011-12-07 21:23:42 -0800 (Wed, 07 Dec 2011)
Log Message
Implement the SSE optimization in SincResampler::process()
https://bugs.webkit.org/show_bug.cgi?id=73789
Patch by Xingnan Wang <[email protected]> on 2011-12-07
Reviewed by Benjamin Poulain.
Here is about 70% performance improvement on the hot spot of sample convolving.
* platform/audio/SincResampler.cpp:
Modified Paths
Diff
Modified: trunk/Source/WebCore/ChangeLog (102311 => 102312)
--- trunk/Source/WebCore/ChangeLog 2011-12-08 05:08:32 UTC (rev 102311)
+++ trunk/Source/WebCore/ChangeLog 2011-12-08 05:23:42 UTC (rev 102312)
@@ -1,3 +1,14 @@
+2011-12-07 Xingnan Wang <[email protected]>
+
+ Implement the SSE optimization in SincResampler::process()
+ https://bugs.webkit.org/show_bug.cgi?id=73789
+
+ Reviewed by Benjamin Poulain.
+
+ Here is about 70% performance improvement on the hot spot of sample convolving.
+
+ * platform/audio/SincResampler.cpp:
+
2011-12-07 Luke Macpherson <[email protected]>
Implement border image source properties in CSSStyleApplyProperty.
Modified: trunk/Source/WebCore/platform/audio/SincResampler.cpp (102311 => 102312)
--- trunk/Source/WebCore/platform/audio/SincResampler.cpp 2011-12-08 05:08:32 UTC (rev 102311)
+++ trunk/Source/WebCore/platform/audio/SincResampler.cpp 2011-12-08 05:23:42 UTC (rev 102312)
@@ -35,6 +35,10 @@
#include "AudioBus.h"
#include <wtf/MathExtras.h>
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
using namespace std;
// Input buffer layout, dividing the total buffer into regions (r0 - r5):
@@ -246,8 +250,6 @@
// Generate a single output sample.
int n = m_kernelSize;
- // FIXME: add SIMD optimizations for the following. The scalar code-path can probably also be optimized better.
-
#define CONVOLVE_ONE_SAMPLE \
input = *inputP++; \
sum1 += input * *k1; \
@@ -257,6 +259,76 @@
{
float input;
+
+#ifdef __SSE2__
+ // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed seperately.
+ while ((reinterpret_cast<uintptr_t>(inputP) & 0x0F) && n) {
+ CONVOLVE_ONE_SAMPLE
+ n--;
+ }
+
+ // Now the inputP is aligned and start to apply SSE.
+ float* endP = inputP + n - n % 4;
+ __m128 mInput;
+ __m128 mK1;
+ __m128 mK2;
+ __m128 mul1;
+ __m128 mul2;
+
+ __m128 sums1 = _mm_setzero_ps();
+ __m128 sums2 = _mm_setzero_ps();
+ bool k1Aligned = !(reinterpret_cast<uintptr_t>(k1) & 0x0F);
+ bool k2Aligned = !(reinterpret_cast<uintptr_t>(k2) & 0x0F);
+
+#define LOAD_DATA(l1, l2) \
+ mInput = _mm_load_ps(inputP); \
+ mK1 = _mm_##l1##_ps(k1); \
+ mK2 = _mm_##l2##_ps(k2);
+
+#define CONVOLVE_4_SAMPLES \
+ mul1 = _mm_mul_ps(mInput, mK1); \
+ mul2 = _mm_mul_ps(mInput, mK2); \
+ sums1 = _mm_add_ps(sums1, mul1); \
+ sums2 = _mm_add_ps(sums2, mul2); \
+ inputP += 4; \
+ k1 += 4; \
+ k2 += 4;
+
+ if (k1Aligned && k2Aligned) { // both aligned
+ while (inputP < endP) {
+ LOAD_DATA(load, load)
+ CONVOLVE_4_SAMPLES
+ }
+ } else if (!k1Aligned && k2Aligned) { // only k2 aligned
+ while (inputP < endP) {
+ LOAD_DATA(loadu, load)
+ CONVOLVE_4_SAMPLES
+ }
+ } else if (k1Aligned && !k2Aligned) { // only k1 aligned
+ while (inputP < endP) {
+ LOAD_DATA(load, loadu)
+ CONVOLVE_4_SAMPLES
+ }
+ } else { // both non-aligned
+ while (inputP < endP) {
+ LOAD_DATA(loadu, loadu)
+ CONVOLVE_4_SAMPLES
+ }
+ }
+
+ // Summarize the SSE results to sum1 and sum2.
+ float* groupSumP = reinterpret_cast<float*>(&sums1);
+ sum1 += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3];
+ groupSumP = reinterpret_cast<float*>(&sums2);
+ sum2 += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3];
+
+ n %= 4;
+ while (n) {
+ CONVOLVE_ONE_SAMPLE
+ n--;
+ }
+#else
+ // FIXME: add ARM NEON optimizations for the following. The scalar code-path can probably also be optimized better.
// Optimize size 32 and size 64 kernels by unrolling the while loop.
// A 20 - 30% speed improvement was measured in some cases by using this approach.
@@ -365,6 +437,7 @@
CONVOLVE_ONE_SAMPLE
}
}
+#endif
}
// Linearly interpolate the two "convolutions".
_______________________________________________
webkit-changes mailing list
[email protected]
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes