Title: [268539] trunk/Source/WebCore
Revision
268539
Author
[email protected]
Date
2020-10-15 11:21:25 -0700 (Thu, 15 Oct 2020)

Log Message

Vectorize StereoPanner's panToTargetValue()
https://bugs.webkit.org/show_bug.cgi?id=217765

Reviewed by Geoffrey Garen.

Vectorize StereoPanner's panToTargetValue().

No new tests, no Web-facing behavior change.

* platform/audio/StereoPanner.cpp:
(WebCore::StereoPanner::panToTargetValue):
* platform/audio/VectorMath.cpp:
(WebCore::VectorMath::multiplyByScalar):
(WebCore::VectorMath::multiplyByScalarThenAddToOutput):
(WebCore::VectorMath::multiplyByScalarThenAddToVector):
* platform/audio/VectorMath.h:

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (268538 => 268539)


--- trunk/Source/WebCore/ChangeLog	2020-10-15 18:19:42 UTC (rev 268538)
+++ trunk/Source/WebCore/ChangeLog	2020-10-15 18:21:25 UTC (rev 268539)
@@ -1,3 +1,22 @@
+2020-10-15  Chris Dumez  <[email protected]>
+
+        Vectorize StereoPanner's panToTargetValue()
+        https://bugs.webkit.org/show_bug.cgi?id=217765
+
+        Reviewed by Geoffrey Garen.
+
+        Vectorize StereoPanner's panToTargetValue().
+
+        No new tests, no Web-facing behavior change.
+
+        * platform/audio/StereoPanner.cpp:
+        (WebCore::StereoPanner::panToTargetValue):
+        * platform/audio/VectorMath.cpp:
+        (WebCore::VectorMath::multiplyByScalar):
+        (WebCore::VectorMath::multiplyByScalarThenAddToOutput):
+        (WebCore::VectorMath::multiplyByScalarThenAddToVector):
+        * platform/audio/VectorMath.h:
+
 2020-10-15  Chris Lord  <[email protected]>
 
         [GTK][WPE] Add support for smooth scrolling animation with async scrolling

Modified: trunk/Source/WebCore/platform/audio/StereoPanner.cpp (268538 => 268539)


--- trunk/Source/WebCore/platform/audio/StereoPanner.cpp	2020-10-15 18:19:42 UTC (rev 268538)
+++ trunk/Source/WebCore/platform/audio/StereoPanner.cpp	2020-10-15 18:21:25 UTC (rev 268539)
@@ -29,6 +29,7 @@
 
 #if ENABLE(WEB_AUDIO)
 
+#include "VectorMath.h"
 #include <wtf/MathExtras.h>
 
 namespace WebCore {
@@ -116,11 +117,9 @@
     
     if (!sourceL || !sourceR || !destinationL || !destinationR)
         return;
-    
+
     float targetPan = clampTo(panValue, -1.0, 1.0);
     
-    int n = framesToProcess;
-    
     if (numberOfInputChannels == 1) {
         double panRadian = (targetPan * 0.5 + 0.5) * piOverTwoDouble;
         
@@ -127,27 +126,20 @@
         double gainL = cos(panRadian);
         double gainR = sin(panRadian);
         
-        while (n--) {
-            float inputL = *sourceL++;
-            *destinationL++ = static_cast<float>(inputL * gainL);
-            *destinationR++ = static_cast<float>(inputL * gainR);
-        }
+        VectorMath::multiplyByScalar(sourceL, gainL, destinationL, framesToProcess);
+        VectorMath::multiplyByScalar(sourceL, gainR, destinationR, framesToProcess);
     } else {
         double panRadian = (targetPan <= 0 ? targetPan + 1 : targetPan) * piOverTwoDouble;
         
         double gainL = cos(panRadian);
         double gainR = sin(panRadian);
-        
-        while (n--) {
-            float inputL = *sourceL++;
-            float inputR = *sourceR++;
-            if (targetPan <= 0) {
-                *destinationL++ = static_cast<float>(inputL + inputR * gainL);
-                *destinationR++ = static_cast<float>(inputR * gainR);
-            } else {
-                *destinationL++ = static_cast<float>(inputL * gainL);
-                *destinationR++ = static_cast<float>(inputR + inputL * gainR);
-            }
+
+        if (targetPan <= 0) {
+            VectorMath::multiplyByScalarThenAddToVector(sourceR, gainL, sourceL, destinationL, framesToProcess);
+            VectorMath::multiplyByScalar(sourceR, gainR, destinationR, framesToProcess);
+        } else {
+            VectorMath::multiplyByScalar(sourceL, gainL, destinationL, framesToProcess);
+            VectorMath::multiplyByScalarThenAddToVector(sourceL, gainR, sourceR, destinationR, framesToProcess);
         }
     }
 }

Modified: trunk/Source/WebCore/platform/audio/VectorMath.cpp (268538 => 268539)


--- trunk/Source/WebCore/platform/audio/VectorMath.cpp	2020-10-15 18:19:42 UTC (rev 268538)
+++ trunk/Source/WebCore/platform/audio/VectorMath.cpp	2020-10-15 18:21:25 UTC (rev 268539)
@@ -52,9 +52,9 @@
 #if USE(ACCELERATE)
 // On the Mac we use the highly optimized versions in Accelerate.framework
 
-void multiplyByScalar(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
+void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
 {
-    vDSP_vsmul(inputVector, 1, &scale, outputVector, 1, numberOfElementsToProcess);
+    vDSP_vsmul(inputVector, 1, &scalar, outputVector, 1, numberOfElementsToProcess);
 }
 
 void add(const float* inputVector1, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess)
@@ -86,11 +86,16 @@
     vDSP_zvmul(&sc1, 1, &sc2, 1, &dest, 1, numberOfElementsToProcess, 1);
 }
 
-void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
+void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
 {
-    vDSP_vsma(inputVector, 1, &scale, outputVector, 1, outputVector, 1, numberOfElementsToProcess);
+    vDSP_vsma(inputVector, 1, &scalar, outputVector, 1, outputVector, 1, numberOfElementsToProcess);
 }
 
+void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess)
+{
+    vDSP_vsma(inputVector1, 1, &scalar, inputVector2, 1, outputVector, 1, numberOfElementsToProcess);
+}
+
 void addVectorsThenMultiplyByScalar(const float* inputVector1, const float* inputVector2, float scalar, float* outputVector, size_t numberOfElementsToProcess)
 {
     vDSP_vasm(inputVector1, 1, inputVector2, 1, &scalar, outputVector, 1, numberOfElementsToProcess);
@@ -128,14 +133,20 @@
     return !(reinterpret_cast<uintptr_t>(vector) & 0x0F);
 }
 
-void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
+void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess)
 {
+    multiplyByScalarThenAddToOutput(inputVector1, scalar, outputVector, numberOfElementsToProcess);
+    add(outputVector, inputVector2, outputVector, numberOfElementsToProcess);
+}
+
+void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
+{
     size_t n = numberOfElementsToProcess;
 
 #if CPU(X86_SSE2)
     // If the inputVector address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
     while (!is16ByteAligned(inputVector) && n) {
-        *outputVector += scale * *inputVector;
+        *outputVector += scalar * *inputVector;
         inputVector++;
         outputVector++;
         n--;
@@ -148,7 +159,7 @@
     __m128 pSource;
     __m128 dest;
     __m128 temp;
-    __m128 mScale = _mm_set_ps1(scale);
+    __m128 mScale = _mm_set_ps1(scalar);
 
     bool destAligned = is16ByteAligned(outputVector);
 
@@ -174,7 +185,7 @@
     size_t tailFrames = n % 4;
     const float* endP = outputVector + n - tailFrames;
 
-    float32x4_t k = vdupq_n_f32(scale);
+    float32x4_t k = vdupq_n_f32(scalar);
     while (outputVector < endP) {
         float32x4_t source = vld1q_f32(inputVector);
         float32x4_t dest = vld1q_f32(outputVector);
@@ -188,13 +199,13 @@
     n = tailFrames;
 #endif
     while (n--) {
-        *outputVector += *inputVector * scale;
+        *outputVector += *inputVector * scalar;
         ++inputVector;
         ++outputVector;
     }
 }
 
-void multiplyByScalar(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
+void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
 {
     size_t n = numberOfElementsToProcess;
 
@@ -201,7 +212,7 @@
 #if CPU(X86_SSE2)
     // If the inputVector address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
     while (!is16ByteAligned(inputVector) && n) {
-        *outputVector = scale * *inputVector;
+        *outputVector = scalar * *inputVector;
         inputVector++;
         outputVector++;
         n--;
@@ -209,7 +220,7 @@
 
     // Now the inputVector address is aligned and start to apply SSE.
     size_t group = n / 4;
-    __m128 mScale = _mm_set_ps1(scale);
+    __m128 mScale = _mm_set_ps1(scalar);
     __m128* pSource;
     __m128* pDest;
     __m128 dest;
@@ -243,7 +254,7 @@
 
     while (outputVector < endP) {
         float32x4_t source = vld1q_f32(inputVector);
-        vst1q_f32(outputVector, vmulq_n_f32(source, scale));
+        vst1q_f32(outputVector, vmulq_n_f32(source, scalar));
 
         inputVector += 4;
         outputVector += 4;
@@ -251,7 +262,7 @@
     n = tailFrames;
 #endif
     while (n--) {
-        *outputVector = scale * *inputVector;
+        *outputVector = scalar * *inputVector;
         ++inputVector;
         ++outputVector;
     }

Modified: trunk/Source/WebCore/platform/audio/VectorMath.h (268538 => 268539)


--- trunk/Source/WebCore/platform/audio/VectorMath.h	2020-10-15 18:19:42 UTC (rev 268538)
+++ trunk/Source/WebCore/platform/audio/VectorMath.h	2020-10-15 18:21:25 UTC (rev 268539)
@@ -31,15 +31,20 @@
 
 namespace VectorMath {
 
-// Multiples inputVector by scalar then adds the result to outputVector (vsma).
+// Multiples inputVector by scalar then adds the result to outputVector (simplified vsma).
 // for (n = 0; n < numberOfElementsToProcess; ++n)
-//     outputVector[n] += inputVector[n] * scale;
-void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess);
+//     outputVector[n] += inputVector[n] * scalar;
+void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess);
 
+// Adds a vector inputVector2 to the product of a scalar value and a single-precision vector inputVector1 (vsma).
+// for (n = 0; n < numberOfElementsToProcess; ++n)
+//     outputVector[n] = inputVector1[n] * scalar + inputVector2[n];
+void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess);
+
 // Multiplies the sum of two vectors by a scalar value (vasm).
 void addVectorsThenMultiplyByScalar(const float* inputVector1, const float* inputVector2, float scalar, float* outputVector, size_t numberOfElementsToProcess);
 
-void multiplyByScalar(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess);
+void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess);
 void addScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess);
 void add(const float* inputVector1, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess);
 
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to