- Revision
- 210496
- Author
- [email protected]
- Date
- 2017-01-08 09:44:57 -0800 (Sun, 08 Jan 2017)
Log Message
Introduce CPU(X86_SSE2) instead of various SSE2 checks
https://bugs.webkit.org/show_bug.cgi?id=166808
Reviewed by Michael Catanzaro.
Now copyLCharsFromUCharSource can use SSE2 implementation on non-Darwin
OSes, and all SSE2 code paths are available for MSVC on x86 if /arch:SSE2
or higher is enabled, and for MSVC on x86_64.
Source/WebCore:
No new tests needed.
* platform/audio/SincResampler.cpp:
(WebCore::SincResampler::process):
* platform/audio/VectorMath.cpp:
(WebCore::VectorMath::vsma):
(WebCore::VectorMath::vsmul):
(WebCore::VectorMath::vadd):
(WebCore::VectorMath::vmul):
(WebCore::VectorMath::zvmul):
(WebCore::VectorMath::vsvesq):
(WebCore::VectorMath::vmaxmgv):
Source/WTF:
* wtf/Platform.h:
* wtf/text/ASCIIFastPath.h:
(WTF::copyLCharsFromUCharSource):
Modified Paths
Diff
Modified: trunk/Source/WTF/ChangeLog (210495 => 210496)
--- trunk/Source/WTF/ChangeLog 2017-01-08 06:02:25 UTC (rev 210495)
+++ trunk/Source/WTF/ChangeLog 2017-01-08 17:44:57 UTC (rev 210496)
@@ -1,3 +1,18 @@
+2017-01-08 Konstantin Tokarev <[email protected]>
+
+ Introduce CPU(X86_SSE2) instead of various SSE2 checks
+ https://bugs.webkit.org/show_bug.cgi?id=166808
+
+ Reviewed by Michael Catanzaro.
+
+ Now copyLCharsFromUCharSource can use SSE2 implementation on non-Darwin
+ OSes, and all SSE2 code paths are available for MSVC on x86 if /arch:SSE2
+ or higher is enabled, and for MSVC on x86_64.
+
+ * wtf/Platform.h:
+ * wtf/text/ASCIIFastPath.h:
+ (WTF::copyLCharsFromUCharSource):
+
2017-01-05 Myles C. Maxfield <[email protected]>
Carets can split up marriages and families
Modified: trunk/Source/WTF/wtf/Platform.h (210495 => 210496)
--- trunk/Source/WTF/wtf/Platform.h 2017-01-08 06:02:25 UTC (rev 210495)
+++ trunk/Source/WTF/wtf/Platform.h 2017-01-08 17:44:57 UTC (rev 210496)
@@ -162,12 +162,18 @@
|| defined(_X86_) \
|| defined(__THW_INTEL)
#define WTF_CPU_X86 1
+
+#if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+#define WTF_CPU_X86_SSE2 1
#endif
+#endif
+
/* CPU(X86_64) - AMD64 / Intel64 / x86_64 64-bit */
#if defined(__x86_64__) \
|| defined(_M_X64)
#define WTF_CPU_X86_64 1
+#define WTF_CPU_X86_SSE2 1
#endif
/* CPU(ARM64) - Apple */
Modified: trunk/Source/WTF/wtf/text/ASCIIFastPath.h (210495 => 210496)
--- trunk/Source/WTF/wtf/text/ASCIIFastPath.h 2017-01-08 06:02:25 UTC (rev 210495)
+++ trunk/Source/WTF/wtf/text/ASCIIFastPath.h 2017-01-08 17:44:57 UTC (rev 210496)
@@ -27,7 +27,7 @@
#include <wtf/StdLibExtras.h>
#include <wtf/text/LChar.h>
-#if OS(DARWIN) && (CPU(X86) || CPU(X86_64))
+#if CPU(X86_SSE2)
#include <emmintrin.h>
#endif
@@ -109,7 +109,7 @@
inline void copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length)
{
-#if OS(DARWIN) && (CPU(X86) || CPU(X86_64))
+#if CPU(X86_SSE2)
const uintptr_t memoryAccessSize = 16; // Memory accesses on 16 byte (128 bit) alignment
const uintptr_t memoryAccessMask = memoryAccessSize - 1;
Modified: trunk/Source/WebCore/ChangeLog (210495 => 210496)
--- trunk/Source/WebCore/ChangeLog 2017-01-08 06:02:25 UTC (rev 210495)
+++ trunk/Source/WebCore/ChangeLog 2017-01-08 17:44:57 UTC (rev 210496)
@@ -1,3 +1,27 @@
+2017-01-08 Konstantin Tokarev <[email protected]>
+
+ Introduce CPU(X86_SSE2) instead of various SSE2 checks
+ https://bugs.webkit.org/show_bug.cgi?id=166808
+
+ Reviewed by Michael Catanzaro.
+
+ Now copyLCharsFromUCharSource can use SSE2 implementation on non-Darwin
+ OSes, and all SSE2 code paths are available for MSVC on x86 if /arch:SSE2
+ or higher is enabled, and for MSVC on x86_64.
+
+ No new tests needed.
+
+ * platform/audio/SincResampler.cpp:
+ (WebCore::SincResampler::process):
+ * platform/audio/VectorMath.cpp:
+ (WebCore::VectorMath::vsma):
+ (WebCore::VectorMath::vsmul):
+ (WebCore::VectorMath::vadd):
+ (WebCore::VectorMath::vmul):
+ (WebCore::VectorMath::zvmul):
+ (WebCore::VectorMath::vsvesq):
+ (WebCore::VectorMath::vmaxmgv):
+
2017-01-07 Simon Fraser <[email protected]>
Avoid triggering rebuilds for minor changes of CSSProperties.json
Modified: trunk/Source/WebCore/platform/audio/SincResampler.cpp (210495 => 210496)
--- trunk/Source/WebCore/platform/audio/SincResampler.cpp 2017-01-08 06:02:25 UTC (rev 210495)
+++ trunk/Source/WebCore/platform/audio/SincResampler.cpp 2017-01-08 17:44:57 UTC (rev 210496)
@@ -35,7 +35,7 @@
#include "AudioBus.h"
#include <wtf/MathExtras.h>
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
#include <emmintrin.h>
#endif
@@ -260,7 +260,7 @@
{
float input;
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
// If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed seperately.
while ((reinterpret_cast<uintptr_t>(inputP) & 0x0F) && n) {
CONVOLVE_ONE_SAMPLE
Modified: trunk/Source/WebCore/platform/audio/VectorMath.cpp (210495 => 210496)
--- trunk/Source/WebCore/platform/audio/VectorMath.cpp 2017-01-08 06:02:25 UTC (rev 210495)
+++ trunk/Source/WebCore/platform/audio/VectorMath.cpp 2017-01-08 17:44:57 UTC (rev 210496)
@@ -32,7 +32,7 @@
#include <Accelerate/Accelerate.h>
#endif
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
#include <emmintrin.h>
#endif
@@ -134,7 +134,7 @@
{
int n = framesToProcess;
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
if ((sourceStride == 1) && (destStride == 1)) {
float k = *scale;
@@ -207,7 +207,7 @@
{
int n = framesToProcess;
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
if ((sourceStride == 1) && (destStride == 1)) {
float k = *scale;
@@ -278,7 +278,7 @@
sourceP += sourceStride;
destP += destStride;
}
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
}
#endif
}
@@ -287,7 +287,7 @@
{
int n = framesToProcess;
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {
// If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
while ((reinterpret_cast<size_t>(source1P) & 0x0F) && n) {
@@ -390,7 +390,7 @@
source2P += sourceStride2;
destP += destStride;
}
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
}
#endif
}
@@ -400,7 +400,7 @@
int n = framesToProcess;
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) {
// If the source1P address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
while ((reinterpret_cast<uintptr_t>(source1P) & 0x0F) && n) {
@@ -473,7 +473,7 @@
void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)
{
unsigned i = 0;
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
// Only use the SSE optimization in the very common case that all addresses are 16-byte aligned.
// Otherwise, fall through to the scalar code below.
if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F)
@@ -531,7 +531,7 @@
int n = framesToProcess;
float sum = 0;
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
if (sourceStride == 1) {
// If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {
@@ -596,7 +596,7 @@
int n = framesToProcess;
float max = 0;
-#ifdef __SSE2__
+#if CPU(X86_SSE2)
if (sourceStride == 1) {
// If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) {