Diff
Modified: trunk/JSTests/ChangeLog (248937 => 248938)
--- trunk/JSTests/ChangeLog 2019-08-21 06:00:02 UTC (rev 248937)
+++ trunk/JSTests/ChangeLog 2019-08-21 06:30:05 UTC (rev 248938)
@@ -1,3 +1,46 @@
+2019-08-20 Justin Michaud <justin_mich...@apple.com>
+
+ Identify memcpy loops in b3
+ https://bugs.webkit.org/show_bug.cgi?id=200181
+
+ Reviewed by Saam Barati.
+
+ * microbenchmarks/memcpy-loop.js: Added.
+ (doTest):
+ (let.arr1):
+ * microbenchmarks/memcpy-typed-loop-large.js: Added.
+ (doTest):
+ (let.arr1.new.Int32Array.1000000.let.arr2.new.Int32Array.1000000):
+ (arr2):
+ * microbenchmarks/memcpy-typed-loop-small.js: Added.
+ (doTest):
+ (16.let.arr1.new.Int32Array.size.let.arr2.new.Int32Array.size):
+ (16.arr2):
+ * microbenchmarks/memcpy-typed-loop-speculative.js: Added.
+ (doTest):
+ (let.arr1.new.Int32Array.10.let.arr2.new.Int32Array.10):
+ (arr2):
+ * microbenchmarks/memcpy-wasm-large.js: Added.
+ (typeof.WebAssembly.string_appeared_here.eq):
+ (typeof.WebAssembly.string_appeared_here.const.1.new.WebAssembly.Instance.new.WebAssembly.Module.new.Uint8Array):
+ * microbenchmarks/memcpy-wasm-medium.js: Added.
+ (typeof.WebAssembly.string_appeared_here.eq):
+ (typeof.WebAssembly.string_appeared_here.const.1.new.WebAssembly.Instance.new.WebAssembly.Module.new.Uint8Array):
+ * microbenchmarks/memcpy-wasm-small.js: Added.
+ (typeof.WebAssembly.string_appeared_here.eq):
+ (typeof.WebAssembly.string_appeared_here.const.1.new.WebAssembly.Instance.new.WebAssembly.Module.new.Uint8Array):
+ * microbenchmarks/memcpy-wasm.js: Added.
+ (typeof.WebAssembly.string_appeared_here.eq):
+ (typeof.WebAssembly.string_appeared_here.const.1.new.WebAssembly.Instance.new.WebAssembly.Module.new.Uint8Array):
+ * stress/memcpy-typed-loops.js: Added.
+ (noLoop):
+ (invalidStart):
+ (const.size.10.let.arr1.new.Int32Array.size.let.arr2.new.Int32Array.size):
+ (arr2):
+ * wasm/function-tests/memcpy-wasm-loop.js: Added.
+ (0.GetLocal.3.I32Const.1.I32Add.SetLocal.3.Br.1.End.End.End.WebAssembly):
+ (string_appeared_here):
+
2019-08-20 Yusuke Suzuki <ysuz...@apple.com>
[JSC] Array.prototype.toString should not get "join" function each time
Added: trunk/JSTests/microbenchmarks/memcpy-loop.js (0 => 248938)
--- trunk/JSTests/microbenchmarks/memcpy-loop.js (rev 0)
+++ trunk/JSTests/microbenchmarks/memcpy-loop.js 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,26 @@
+function doTest(arr1) {
+ let arr2 = []
+ for (let i=0; i<arr1.length; ++i) {
+ arr2[i] = arr1[i]
+ }
+ return arr2
+}
+noInline(doTest);
+
+let arr1 = []
+for (let i=0; i<1000; ++i) {
+ arr1[i] = { test: i }
+}
+
+for (let i=0; i<100000; ++i) doTest(arr1)
+
+if (doTest(arr1) == arr1)
+ throw "Error: did not copy"
+
+if (doTest(arr1)[5].test != 5)
+ throw "Error: bad copy"
+
+doTest(arr1)[5].test = 42
+
+if (arr1[5].test != 42)
+ throw "Error: bad copy"
Added: trunk/JSTests/microbenchmarks/memcpy-typed-loop-large.js (0 => 248938)
--- trunk/JSTests/microbenchmarks/memcpy-typed-loop-large.js (rev 0)
+++ trunk/JSTests/microbenchmarks/memcpy-typed-loop-large.js 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,27 @@
+function doTest(arr1, arr2) {
+ if (arr1.length != arr2.length)
+ return []
+ for (let i=0; i<arr1.length; ++i) {
+ arr2[i] = arr1[i]
+ }
+ return arr2
+}
+noInline(doTest);
+
+for (let i=0; i<5000; ++i) doTest([], [])
+
+let arr1 = new Int32Array(1000000)
+let arr2 = new Int32Array(1000000)
+for (let i=0; i<arr1.length; ++i) {
+ arr1[i] = i
+}
+
+for (let i=0; i<1000; ++i) doTest(arr1, arr2)
+
+arr2 = new Int32Array(arr1.length)
+doTest(arr1, arr2)
+
+for (let i=0; i<arr1.length; ++i) {
+ if (arr2[i] != arr1[i])
+ throw "Error: bad copy: " + i + " " + arr1[i] + " " + arr2[i]
+}
Added: trunk/JSTests/microbenchmarks/memcpy-typed-loop-small.js (0 => 248938)
--- trunk/JSTests/microbenchmarks/memcpy-typed-loop-small.js (rev 0)
+++ trunk/JSTests/microbenchmarks/memcpy-typed-loop-small.js 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,27 @@
+function doTest(arr1, arr2) {
+ if (arr1.length != arr2.length)
+ return []
+ for (let i=0; i<arr1.length; ++i) {
+ arr2[i] = arr1[i]
+ }
+ return arr2
+}
+noInline(doTest);
+
+for (size of [1, 3, 8, 10, 12, 16]) {
+ let arr1 = new Int32Array(size)
+ let arr2 = new Int32Array(size)
+ for (let i=0; i<arr1.length; ++i) {
+ arr1[i] = i
+ }
+
+ for (let i=0; i<50000000; ++i) doTest(arr1, arr2)
+
+ arr2 = new Int32Array(arr1.length)
+ doTest(arr1, arr2)
+
+ for (let i=0; i<arr1.length; ++i) {
+ if (arr2[i] != arr1[i])
+ throw "Error: bad copy: " + i + " " + arr1[i] + " " + arr2[i]
+ }
+}
Added: trunk/JSTests/microbenchmarks/memcpy-typed-loop-speculative.js (0 => 248938)
--- trunk/JSTests/microbenchmarks/memcpy-typed-loop-speculative.js (rev 0)
+++ trunk/JSTests/microbenchmarks/memcpy-typed-loop-speculative.js 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,23 @@
+function doTest(arr1, arr2) {
+ for (let i=0; i<arr1.length; ++i) {
+ arr2[i] = arr1[i]
+ }
+ return arr2
+}
+noInline(doTest);
+
+let arr1 = new Int32Array(10)
+let arr2 = new Int32Array(10)
+for (let i=0; i<arr1.length; ++i) {
+ arr1[i] = i
+}
+
+for (let i=0; i<500000; ++i) doTest(arr1, arr2)
+
+arr2 = new Int32Array(arr1.length)
+doTest(arr1, arr2)
+
+for (let i=0; i<arr1.length; ++i) {
+ if (arr2[i] != arr1[i])
+ throw "Error: bad copy: " + i + " " + arr1[i] + " " + arr2[i]
+}
Added: trunk/JSTests/microbenchmarks/memcpy-wasm-large.js (0 => 248938)
--- trunk/JSTests/microbenchmarks/memcpy-wasm-large.js (rev 0)
+++ trunk/JSTests/microbenchmarks/memcpy-wasm-large.js 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,34 @@
+// Source in wasm/stress/memcpy-wasm
+if (typeof WebAssembly === "object") {
+
+function eq(a, b) {
+ if (a !== b)
+ throw new Error("Not equal: " + a + " " + b);
+}
+
+let pages = 64
+let memory = new WebAssembly.Memory({initial: pages, maximum: pages});
+
+let i32 = new Int32Array(memory.buffer);
+for (let i = 0; i < 100000; i++) {
+ i32[i] = i;
+}
+
+const $1 = new WebAssembly.Instance(new WebAssembly.Module(new Uint8Array([
+0,97,115,109,1,0,0,0,1,7,1,96,3,127,127,127,0,2,12,1,2,106,115,3,109,101,109,2,1,pages,pages,3,2,1,0,6,1,0,7,13,1,9,100,111,95,109,101,109,99,112,121,0,0,10,57,1,55,1,1,127,65,0,33,3,3,64,2,64,32,2,32,3,70,13,0,32,1,65,4,108,32,3,65,4,108,106,32,0,32,3,65,4,108,106,40,0,0,54,0,0,32,3,65,1,106,33,3,12,1,11,11,11
+])), { js: { mem: memory } });
+
+for (let i=0; i<500; ++i)
+ $1.exports.do_memcpy(0,50000,40000);
+
+for (let i = 0; i < 50000; i++) {
+ eq(i32[i], i);
+}
+for (let i = 50000; i < 50000+40000; i++) {
+ eq(i32[i], i-50000);
+}
+for (let i = 50000+40000; i < 100000; i++) {
+ eq(i32[i], i);
+}
+
+}
Added: trunk/JSTests/microbenchmarks/memcpy-wasm-medium.js (0 => 248938)
--- trunk/JSTests/microbenchmarks/memcpy-wasm-medium.js (rev 0)
+++ trunk/JSTests/microbenchmarks/memcpy-wasm-medium.js 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,35 @@
+// Source in wasm/stress/memcpy-wasm
+
+if (typeof WebAssembly === "object") {
+
+function eq(a, b) {
+ if (a !== b)
+ throw new Error("Not equal: " + a + " " + b);
+}
+
+let memory = new WebAssembly.Memory({initial:1, maximum:1});
+
+let i32 = new Int32Array(memory.buffer);
+for (let i = 0; i < 10; i++) {
+ i32[i] = i;
+}
+
+const $1 = new WebAssembly.Instance(new WebAssembly.Module(new Uint8Array([
+0,97,115,109,1,0,0,0,1,7,1,96,3,127,127,127,0,2,12,1,2,106,115,3,109,101,109,2,1,1,1,3,2,1,0,6,1,0,7,13,1,9,100,111,95,109,101,109,99,112,121,0,0,10,57,1,55,1,1,127,65,0,33,3,3,64,2,64,32,2,32,3,70,13,0,32,1,65,4,108,32,3,65,4,108,106,32,0,32,3,65,4,108,106,40,0,0,54,0,0,32,3,65,1,106,33,3,12,1,11,11,11
+])), { js: { mem: memory } });
+
+for (let i=0; i<1000000; ++i)
+ $1.exports.do_memcpy(0,5,5);
+
+eq(i32[0], 0);
+eq(i32[1], 1);
+eq(i32[2], 2);
+eq(i32[3], 3);
+eq(i32[4], 4);
+eq(i32[5], 0);
+eq(i32[6], 1);
+eq(i32[7], 2);
+eq(i32[8], 3);
+eq(i32[9], 4);
+
+}
Added: trunk/JSTests/microbenchmarks/memcpy-wasm-small.js (0 => 248938)
--- trunk/JSTests/microbenchmarks/memcpy-wasm-small.js (rev 0)
+++ trunk/JSTests/microbenchmarks/memcpy-wasm-small.js 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,28 @@
+// Source in wasm/stress/memcpy-wasm
+if (typeof WebAssembly === "object") {
+
+function eq(a, b) {
+ if (a !== b)
+ throw new Error("Not equal: " + a + " " + b);
+}
+
+let memory = new WebAssembly.Memory({initial:1, maximum:1});
+
+let i32 = new Int32Array(memory.buffer);
+for (let i = 0; i < 10; i++) {
+ i32[i] = i;
+}
+
+const $1 = new WebAssembly.Instance(new WebAssembly.Module(new Uint8Array([
+0,97,115,109,1,0,0,0,1,7,1,96,3,127,127,127,0,2,12,1,2,106,115,3,109,101,109,2,1,1,1,3,2,1,0,6,1,0,7,13,1,9,100,111,95,109,101,109,99,112,121,0,0,10,57,1,55,1,1,127,65,0,33,3,3,64,2,64,32,2,32,3,70,13,0,32,1,65,4,108,32,3,65,4,108,106,32,0,32,3,65,4,108,106,40,0,0,54,0,0,32,3,65,1,106,33,3,12,1,11,11,11
+])), { js: { mem: memory } });
+
+for (let i=0; i<1000000; ++i)
+ $1.exports.do_memcpy(0,2,2);
+
+eq(i32[0], 0);
+eq(i32[1], 1);
+eq(i32[2], 0);
+eq(i32[3], 1);
+
+}
Added: trunk/JSTests/microbenchmarks/memcpy-wasm.js (0 => 248938)
--- trunk/JSTests/microbenchmarks/memcpy-wasm.js (rev 0)
+++ trunk/JSTests/microbenchmarks/memcpy-wasm.js 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,33 @@
+// Source in wasm/stress/memcpy-wasm
+if (typeof WebAssembly === "object") {
+
+function eq(a, b) {
+ if (a !== b)
+ throw new Error("Not equal: " + a + " " + b);
+}
+
+let memory = new WebAssembly.Memory({initial:1, maximum:1});
+
+let i32 = new Int32Array(memory.buffer);
+for (let i = 0; i < 2000; i++) {
+ i32[i] = i;
+}
+
+const $1 = new WebAssembly.Instance(new WebAssembly.Module(new Uint8Array([
+0,97,115,109,1,0,0,0,1,7,1,96,3,127,127,127,0,2,12,1,2,106,115,3,109,101,109,2,1,1,1,3,2,1,0,6,1,0,7,13,1,9,100,111,95,109,101,109,99,112,121,0,0,10,57,1,55,1,1,127,65,0,33,3,3,64,2,64,32,2,32,3,70,13,0,32,1,65,4,108,32,3,65,4,108,106,32,0,32,3,65,4,108,106,40,0,0,54,0,0,32,3,65,1,106,33,3,12,1,11,11,11
+])), { js: { mem: memory } });
+
+for (let i=0; i<100000; ++i)
+ $1.exports.do_memcpy(0,500,300);
+
+for (let i = 0; i < 500; i++) {
+ eq(i32[i], i);
+}
+for (let i = 500; i < 500+300; i++) {
+ eq(i32[i], i-500);
+}
+for (let i = 500+300; i < 1000; i++) {
+ eq(i32[i], i);
+}
+
+}
Added: trunk/JSTests/stress/memcpy-typed-loops.js (0 => 248938)
--- trunk/JSTests/stress/memcpy-typed-loops.js (rev 0)
+++ trunk/JSTests/stress/memcpy-typed-loops.js 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,39 @@
+function noLoop(arr1, arr2) {
+ let i=0
+ if (arr1.size%2==0)
+ i = 0
+ else i = 0
+ arr2[i] = arr1[i]
+ return arr2
+}
+noInline(noLoop);
+
+function invalidStart(arr1, arr2) {
+ if (arr1.length != arr2.length)
+ return []
+ let i = 0
+ do {
+ ++i
+ arr2[i] = arr1[i]
+ } while (i < arr1.length-1)
+ return arr2
+}
+noInline(invalidStart);
+
+const size = 10
+let arr1 = new Int32Array(size)
+let arr2 = new Int32Array(size)
+for (let i=0; i<arr1.length; ++i) {
+ arr1[i] = i
+}
+
+for (let i=0; i<10000000; ++i) noLoop(arr1, arr2)
+for (let i=0; i<10000000; ++i) invalidStart(arr1, arr2)
+
+arr2 = new Int32Array(arr1.length)
+invalidStart(arr1, arr2)
+
+for (let i=1; i<arr1.length; ++i) {
+ if (arr2[i] != arr1[i] || arr2[0] != 0)
+ throw "Error: bad copy: " + i + " " + arr1[i] + " " + arr2[i]
+}
Added: trunk/JSTests/wasm/function-tests/memcpy-wasm-loop.js (0 => 248938)
--- trunk/JSTests/wasm/function-tests/memcpy-wasm-loop.js (rev 0)
+++ trunk/JSTests/wasm/function-tests/memcpy-wasm-loop.js 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,90 @@
+import * as assert from '../assert.js';
+import Builder from '../Builder.js';
+
+let memory = new WebAssembly.Memory({initial:1, maximum:1});
+
+let i32 = new Int32Array(memory.buffer);
+for (let i = 0; i < 100; i++) {
+ i32[i] = i;
+}
+
+const $1 = new WebAssembly.Instance(new WebAssembly.Module((new Builder())
+ .Type().End()
+ .Import()
+ .Memory("js", "mem", {initial:1, maximum:1})
+ .End()
+ .Function().End()
+ .Global().End()
+ .Export()
+ .Function("do_memcpy")
+ .End()
+ .Code()
+ .Function("do_memcpy", { params: ["i32","i32","i32"], ret: "void" }, ["i32"])
+ .I32Const(0)
+ .SetLocal(3)
+ .Loop("void")
+ .Block("void", b =>
+ b.GetLocal(2)
+ .GetLocal(3)
+ .I32Eq()
+ .BrIf(0)
+
+ .GetLocal(1)
+ .I32Const(4)
+ .I32Mul()
+ .GetLocal(3)
+ .I32Const(4)
+ .I32Mul()
+ .I32Add()
+
+ .GetLocal(0)
+ // Intentional bug: no multiply here
+ .GetLocal(3)
+ .I32Const(4)
+ .I32Mul()
+ .I32Add()
+ .I32Load(0,0)
+
+ .I32Store(0,0)
+
+ .GetLocal(3)
+ .I32Const(1)
+ .I32Add()
+ .SetLocal(3)
+ .Br(1)
+ )
+ .End()
+ .End()
+ .End().WebAssembly().get()), { js: { mem: memory } });
+
+for (let i=0; i<500; ++i)
+ $1.exports.do_memcpy(0,50,30);
+
+for (let i = 0; i < 50; i++) {
+ assert.eq(i32[i], i);
+}
+for (let i = 50; i < 50+30; i++) {
+ assert.eq(i32[i], i-50);
+}
+for (let i = 50+30; i < 100; i++) {
+ assert.eq(i32[i], i);
+}
+
+$1.exports.do_memcpy(0,5,10);
+for (let i = 0; i < 5; i++) {
+ assert.eq(i32[i], i);
+}
+for (let i = 5; i < 10; i++) {
+ assert.eq(i32[i], i-5);
+}
+for (let i = 10; i < 15; i++) {
+ assert.eq(i32[i], i-10);
+}
+for (let i = 15; i < 20; i++) {
+ assert.eq(i32[i], i);
+}
+
+assert.throws(() => $1.exports.do_memcpy(0,16384-5,6), Error, "Out of bounds memory access (evaluating 'func(...args)')")
+for (let i = 0; i < 5; i++) {
+ assert.eq(i32[16384-5 + i], i);
+}
Modified: trunk/Source/_javascript_Core/ChangeLog (248937 => 248938)
--- trunk/Source/_javascript_Core/ChangeLog 2019-08-21 06:00:02 UTC (rev 248937)
+++ trunk/Source/_javascript_Core/ChangeLog 2019-08-21 06:30:05 UTC (rev 248938)
@@ -1,3 +1,39 @@
+2019-08-20 Justin Michaud <justin_mich...@apple.com>
+
+ Identify memcpy loops in b3
+ https://bugs.webkit.org/show_bug.cgi?id=200181
+
+ Reviewed by Saam Barati.
+
+ Add a new pass in B3 to identify one type of forward byte copy loop and replace it with a call to a custom version of memcpy
+ that will not cause GC tearing and have the correct behaviour when overlapping regions are passed in.
+
+ Microbenchmarks show memcpy-typed-loop-large is about 6x faster, and everything else is neutral. The optimization is disabled
+ on arm for now, until we add a memcpy implementation for it.
+
+ * _javascript_Core.xcodeproj/project.pbxproj:
+ * Sources.txt:
+ * b3/B3Generate.cpp:
+ (JSC::B3::generateToAir):
+ * b3/B3ReduceLoopStrength.cpp: Added.
+ (JSC::B3::fastForwardCopy32):
+ (JSC::B3::ReduceLoopStrength::AddrInfo::appendAddr):
+ (JSC::B3::ReduceLoopStrength::ReduceLoopStrength):
+ (JSC::B3::ReduceLoopStrength::reduceByteCopyLoopsToMemcpy):
+ (JSC::B3::ReduceLoopStrength::hoistValue):
+ (JSC::B3::ReduceLoopStrength::run):
+ (JSC::B3::reduceLoopStrength):
+ * b3/B3ReduceLoopStrength.h: Added.
+ * b3/testb3.h:
+ * b3/testb3_1.cpp:
+ (run):
+ * b3/testb3_8.cpp:
+ (testFastForwardCopy32):
+ (testByteCopyLoop):
+ (testByteCopyLoopStartIsLoopDependent):
+ (testByteCopyLoopBoundIsLoopDependent):
+ (addCopyTests):
+
2019-08-20 Devin Rousso <drou...@apple.com>
Unreviewed, speculative build fix for High Sierra after r248925
Modified: trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj (248937 => 248938)
--- trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj 2019-08-21 06:00:02 UTC (rev 248937)
+++ trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj 2019-08-21 06:30:05 UTC (rev 248938)
@@ -1175,6 +1175,7 @@
70ECA6061AFDBEA200449739 /* JSTemplateObjectDescriptor.h in Headers */ = {isa = PBXBuildFile; fileRef = 70ECA6011AFDBEA200449739 /* JSTemplateObjectDescriptor.h */; settings = {ATTRIBUTES = (Private, ); }; };
70ECA6091AFDBEA200449739 /* TemplateObjectDescriptor.h in Headers */ = {isa = PBXBuildFile; fileRef = 70ECA6041AFDBEA200449739 /* TemplateObjectDescriptor.h */; settings = {ATTRIBUTES = (Private, ); }; };
72AAF7CE1D0D31B3005E60BE /* JSCustomGetterSetterFunction.h in Headers */ = {isa = PBXBuildFile; fileRef = 72AAF7CC1D0D318B005E60BE /* JSCustomGetterSetterFunction.h */; };
+ 73E3799422E0EF6500933565 /* B3ReduceLoopStrength.h in Headers */ = {isa = PBXBuildFile; fileRef = 73E3799322E0EF4F00933565 /* B3ReduceLoopStrength.h */; };
7593C898BE714A64BE93A6E7 /* WasmContextInlines.h in Headers */ = {isa = PBXBuildFile; fileRef = A27958D7FA1142B0AC9E364D /* WasmContextInlines.h */; settings = {ATTRIBUTES = (Private, ); }; };
790081391E95A8EC0052D7CD /* WasmModule.h in Headers */ = {isa = PBXBuildFile; fileRef = 790081371E95A8EC0052D7CD /* WasmModule.h */; settings = {ATTRIBUTES = (Private, ); }; };
7905BB691D12050E0019FE57 /* InlineAccess.h in Headers */ = {isa = PBXBuildFile; fileRef = 7905BB671D12050E0019FE57 /* InlineAccess.h */; settings = {ATTRIBUTES = (Private, ); }; };
@@ -3927,6 +3928,8 @@
70ECA6041AFDBEA200449739 /* TemplateObjectDescriptor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TemplateObjectDescriptor.h; sourceTree = "<group>"; };
72AAF7CB1D0D318B005E60BE /* JSCustomGetterSetterFunction.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JSCustomGetterSetterFunction.cpp; sourceTree = "<group>"; };
72AAF7CC1D0D318B005E60BE /* JSCustomGetterSetterFunction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSCustomGetterSetterFunction.h; sourceTree = "<group>"; };
+ 73E3799322E0EF4F00933565 /* B3ReduceLoopStrength.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = B3ReduceLoopStrength.h; path = b3/B3ReduceLoopStrength.h; sourceTree = "<group>"; };
+ 73E3799522E0EF9100933565 /* B3ReduceLoopStrength.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = B3ReduceLoopStrength.cpp; path = b3/B3ReduceLoopStrength.cpp; sourceTree = "<group>"; };
77B25CB2C3094A92A38E1DB3 /* JSModuleLoader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSModuleLoader.h; sourceTree = "<group>"; };
790081361E95A8EC0052D7CD /* WasmModule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = WasmModule.cpp; sourceTree = "<group>"; };
790081371E95A8EC0052D7CD /* WasmModule.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WasmModule.h; sourceTree = "<group>"; };
@@ -5621,6 +5624,8 @@
0F725CA61C503DED00AD943A /* B3PureCSE.h */,
43422A641C16221E00E2EB98 /* B3ReduceDoubleToFloat.cpp */,
43422A651C16221E00E2EB98 /* B3ReduceDoubleToFloat.h */,
+ 73E3799522E0EF9100933565 /* B3ReduceLoopStrength.cpp */,
+ 73E3799322E0EF4F00933565 /* B3ReduceLoopStrength.h */,
0FEC85B71BE1462F0080FF74 /* B3ReduceStrength.cpp */,
0FEC85B81BE1462F0080FF74 /* B3ReduceStrength.h */,
0FEC84EA1BDACDAC0080FF74 /* B3SlotBaseValue.cpp */,
@@ -8892,6 +8897,7 @@
0FEC852D1BDACDAC0080FF74 /* B3ProcedureInlines.h in Headers */,
0F725CAA1C503DED00AD943A /* B3PureCSE.h in Headers */,
43422A671C16267800E2EB98 /* B3ReduceDoubleToFloat.h in Headers */,
+ 73E3799422E0EF6500933565 /* B3ReduceLoopStrength.h in Headers */,
0FEC85BD1BE1462F0080FF74 /* B3ReduceStrength.h in Headers */,
0FEC85351BDACDAC0080FF74 /* B3SlotBaseValue.h in Headers */,
0F2BBD961C5FF3F50023EF23 /* B3SparseCollection.h in Headers */,
Modified: trunk/Source/_javascript_Core/Sources.txt (248937 => 248938)
--- trunk/Source/_javascript_Core/Sources.txt 2019-08-21 06:00:02 UTC (rev 248937)
+++ trunk/Source/_javascript_Core/Sources.txt 2019-08-21 06:30:05 UTC (rev 248938)
@@ -156,6 +156,7 @@
b3/B3Procedure.cpp
b3/B3PureCSE.cpp
b3/B3ReduceDoubleToFloat.cpp
+b3/B3ReduceLoopStrength.cpp
b3/B3ReduceStrength.cpp
b3/B3SSACalculator.cpp
b3/B3SlotBaseValue.cpp
Modified: trunk/Source/_javascript_Core/b3/B3Generate.cpp (248937 => 248938)
--- trunk/Source/_javascript_Core/b3/B3Generate.cpp 2019-08-21 06:00:02 UTC (rev 248937)
+++ trunk/Source/_javascript_Core/b3/B3Generate.cpp 2019-08-21 06:30:05 UTC (rev 248938)
@@ -48,6 +48,7 @@
#include "B3Procedure.h"
#include "B3PureCSE.h"
#include "B3ReduceDoubleToFloat.h"
+#include "B3ReduceLoopStrength.h"
#include "B3ReduceStrength.h"
#include "B3TimingScope.h"
#include "B3Validate.h"
@@ -91,6 +92,7 @@
eliminateCommonSubexpressions(procedure);
eliminateDeadCode(procedure);
inferSwitches(procedure);
+ reduceLoopStrength(procedure);
if (Options::useB3TailDup())
duplicateTails(procedure);
fixSSA(procedure);
Added: trunk/Source/_javascript_Core/b3/B3ReduceLoopStrength.cpp (0 => 248938)
--- trunk/Source/_javascript_Core/b3/B3ReduceLoopStrength.cpp (rev 0)
+++ trunk/Source/_javascript_Core/b3/B3ReduceLoopStrength.cpp 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,544 @@
+/*
+ * Copyright (C) 2019 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "B3ReduceLoopStrength.h"
+
+#if ENABLE(B3_JIT)
+
+#include "B3BlockInsertionSet.h"
+#include "B3ConstPtrValue.h"
+#include "B3EnsureLoopPreHeaders.h"
+#include "B3PhaseScope.h"
+#include "B3ProcedureInlines.h"
+#include "B3ValueInlines.h"
+#include "B3Variable.h"
+#include "B3VariableValue.h"
+#include <wtf/GraphNodeWorklist.h>
+#include <wtf/IndexSet.h>
+#include <wtf/SmallPtrSet.h>
+#include <wtf/Vector.h>
+
+namespace JSC { namespace B3 {
+
+#if CPU(X86_64)
+/*
+ * The semantics of B3 require that loads and stores are not reduced in granularity.
+ * If we would use system memcpy, then it is possible that we would get a byte copy loop,
+ * and this could cause GC tearing.
+ */
+void fastForwardCopy32(uint32_t* dst, const uint32_t* src, size_t size)
+{
+ uint64_t i;
+ int64_t counter;
+ uint64_t tmp, tmp2;
+
+ asm volatile(
+ "test %q[size], %q[size]\t\n"
+ "jz 900f\t\n" // exit
+ "xorq %q[i], %q[i]\t\n"
+ // if size < 8
+ "cmp $8, %q[size]\t\n"
+ "jl 100f\t\n" // backporch
+ // if (dst + size*4 <= src || src + size*4 <= dst)
+ "leaq (%q[src], %q[size], 4), %q[tmp]\t\n"
+ "cmp %q[tmp], %q[dst]\t\n"
+ "jae 500f\t\n" // simd no overlap
+ "leaq (%q[dst], %q[size], 4), %q[tmp]\t\n"
+ "cmp %q[tmp], %q[src]\t\n"
+ "jae 500f\t\n" // simd no overlap
+ "jmp 100f\t\n" // Backporch
+
+ // Backporch (assumes we can write at least one int)
+ "100:\t\n"
+ // counter = (size - i)%4
+ "mov %q[size], %q[counter]\t\n"
+ "subq %q[i], %q[counter]\t\n"
+
+ // If we are a multiple of 4, unroll the loop
+ // We already know we are nonzero
+ "and $3, %q[counter]\t\n"
+ "jz 200f\t\n" // unrolled loop
+
+ "negq %q[counter]\t\n"
+ ".balign 32\t\n"
+ "101:\t\n"
+ "mov (%q[src], %q[i], 4), %k[tmp]\t\n"
+ "mov %k[tmp], (%q[dst], %q[i], 4)\t\n"
+ "incq %q[i]\t\n"
+ "incq %q[counter]\t\n"
+ "jnz 101b\t\n"
+ // Do we still have anything left?
+ "cmpq %q[size], %q[i]\t\n"
+ "je 900f\t\n" // exit
+ // Then go to the unrolled loop
+ "jmp 200f\t\n"
+
+ // Unrolled loop (assumes multiple of 4, can do one iteration)
+ "200:\t\n"
+ "shr $2, %q[counter]\t\n"
+ "negq %q[counter]\t\n"
+ ".balign 32\t\n"
+ "201:\t\n"
+ "mov (%q[src], %q[i], 4), %k[tmp]\t\n"
+ "mov %k[tmp], (%q[dst], %q[i], 4)\t\n"
+ "mov 4(%q[src], %q[i], 4), %k[tmp2]\t\n"
+ "mov %k[tmp2], 4(%q[dst], %q[i], 4)\t\n"
+ "mov 8(%q[src], %q[i], 4), %k[tmp2]\t\n"
+ "mov %k[tmp2], 8(%q[dst], %q[i], 4)\t\n"
+ "mov 12(%q[src], %q[i], 4), %k[tmp]\t\n"
+ "mov %k[tmp], 12(%q[dst], %q[i], 4)\t\n"
+ "addq $4, %q[i]\t\n"
+ "cmpq %q[size], %q[i]\t\n"
+ "jb 201b\t\n"
+ "jmp 900f\t\n" // exit
+
+ // simd no overlap
+ // counter = -(size/8);
+ "500:\t\n"
+ "mov %q[size], %q[counter]\t\n"
+ "shrq $3, %q[counter]\t\n"
+ "negq %q[counter]\t\n"
+ ".balign 32\t\n"
+ "501:\t\n"
+ "movups (%q[src], %q[i], 4), %%xmm0\t\n"
+ "movups 16(%q[src], %q[i], 4), %%xmm1\t\n"
+ "movups %%xmm0, (%q[dst], %q[i], 4)\t\n"
+ "movups %%xmm1, 16(%q[dst], %q[i], 4)\t\n"
+ "addq $8, %q[i]\t\n"
+ "incq %q[counter]\t\n"
+ "jnz 501b\t\n"
+ // if (i == size)
+ "cmpq %q[size], %q[i]\t\n"
+ "je 900f\t\n" // end
+ "jmp 100b\t\n" // backporch
+ "900:\t\n"
+ : [i] "=&c" (i), [counter] "=&r" (counter), [tmp] "=&a" (tmp), [tmp2] "=&r" (tmp2)
+ : [dst] "D" (dst), [src] "S" (src), [size] "r" (size)
+ : "xmm0", "xmm1");
+}
+#endif
+
+class ReduceLoopStrength {
+ static const bool verbose = false;
+
+ struct AddrInfo {
+ Value* appendAddr(Procedure& proc, BasicBlock* block, Value* addr)
+ {
+ block->appendNew<Value>(proc, Identity, addr->origin(), addr);
+ return addr;
+ }
+
+ Value* insideLoopAddr;
+
+ Value* arrayBase;
+ Value* index;
+ };
+
+public:
+ ReduceLoopStrength(Procedure& proc)
+ : m_proc(proc)
+ , m_insertionSet(proc)
+ , m_blockInsertionSet(proc)
+ , m_root(proc.at(0))
+ , m_phiChildren(proc)
+ {
+ }
+
+#if CPU(X86_64)
+ void reduceByteCopyLoopsToMemcpy()
+ {
+ HashSet<Value*> patternMatchedValues;
+
+ Value* store = m_value;
+ if (store->opcode() != Store)
+ return;
+ patternMatchedValues.add(store);
+
+ Value* load = store->child(0);
+ uint32_t elemSize = sizeofType(load->type());
+ if (load->opcode() != Load || elemSize != 4)
+ return;
+ patternMatchedValues.add(load);
+
+ if (verbose)
+ dataLogLn("Found store/load:", *store);
+
+ auto extractArrayInfo = [&] (Value* value) -> Optional<AddrInfo> {
+ patternMatchedValues.add(value);
+
+ Optional<AddrInfo> addr = { AddrInfo() };
+
+ Value* sum = value;
+ if (value->opcode() == ZExt32)
+ sum = sum->child(0);
+
+ if (sum->opcode() != Add || sum->numChildren() != 2)
+ return { };
+
+ addr->insideLoopAddr = sum;
+ auto extractLoopIndexInSubtree = [&] (Value* value) -> Value* {
+ // Match arrBase[i*elemSize]
+ if (value->opcode() == Shl
+ && value->child(0)->opcode() == Phi
+ && value->child(1)->isInt(WTF::fastLog2(elemSize)))
+ return value->child(0);
+ if (value->opcode() == Shl
+ && value->child(0)->opcode() == ZExt32
+ && value->child(0)->child(0)->opcode() == Phi
+ && value->child(1)->isInt(WTF::fastLog2(elemSize)))
+ return value->child(0)->child(0);
+ return nullptr;
+ };
+
+ // Try to find a known pattern applied to a single phi, which we can assume is our loop index.
+ Value* left = extractLoopIndexInSubtree(sum->child(0));
+ Value* right = extractLoopIndexInSubtree(sum->child(1));
+ if (left && !right) {
+ addr->index = left;
+ addr->arrayBase = sum->child(1);
+ } else if (right && !left) {
+ addr->index = right;
+ addr->arrayBase = sum->child(0);
+ } else
+ return { };
+
+ patternMatchedValues.add(addr->index);
+ patternMatchedValues.add(addr->arrayBase);
+
+ return addr;
+ };
+
+ auto destination = extractArrayInfo(store->child(1));
+ auto source = extractArrayInfo(load->child(0));
+
+ if (!source || !destination || source->index != destination->index)
+ return;
+
+ if (destination->arrayBase->type() != Int64 || source->arrayBase->type() != Int64)
+ return;
+
+ if (verbose)
+ dataLogLn("Found array info: ", *source->arrayBase, "[", *source->index, "] = ", *destination->arrayBase, "[", *destination->index, "]");
+
+ // Find the loop header, footer and inner blocks.
+ // Verify that there is one entrance to and one exit from the loop.
+ auto* loop = m_proc.naturalLoops().innerMostLoopOf(m_block);
+ if (!loop)
+ return;
+ BasicBlock* loopHead = loop->header();
+ BasicBlock* loopFoot = nullptr;
+ BasicBlock* loopPreheader = nullptr;
+ BasicBlock* loopPostfooter = nullptr;
+ SmallPtrSet<BasicBlock*> loopInnerBlocks;
+
+ {
+ for (unsigned i = 0; i < loop->size(); ++i)
+ loopInnerBlocks.add(loop->at(i));
+
+ if (!loopInnerBlocks.contains(load->owner))
+ return;
+
+ if (!loopInnerBlocks.contains(store->owner))
+ return;
+
+ SmallPtrSet<BasicBlock*> loopEntrances;
+ SmallPtrSet<BasicBlock*> loopExits;
+ for (auto* block : loopInnerBlocks) {
+ for (auto successor : block->successors()) {
+ if (!loopInnerBlocks.contains(successor.block()))
+ loopExits.add(successor.block());
+ }
+ for (auto* predecessor : block->predecessors()) {
+ if (!loopInnerBlocks.contains(predecessor))
+ loopEntrances.add(predecessor);
+ }
+ }
+
+ if (loopExits.size() != 1) {
+ if (verbose) {
+ dataLog("Loop has incorrect number of exits: ");
+ for (BasicBlock* block : loopExits)
+ dataLog(*block, " ");
+ dataLogLn();
+ }
+ return;
+ }
+ loopPostfooter = *loopExits.begin();
+
+ if (loopEntrances.size() != 1) {
+ if (verbose) {
+ dataLog("Loop has incorrect number of entrances: ");
+ for (BasicBlock* block : loopEntrances)
+ dataLog(*block, " ");
+ dataLogLn();
+ }
+ return;
+ }
+ loopPreheader = *loopEntrances.begin();
+
+ if (!loopHead->predecessors().contains(loopPreheader)) {
+ if (verbose)
+ dataLogLn("Loop head does not follow preheader");
+ return;
+ }
+
+ for (BasicBlock* predecessor : loopPostfooter->predecessors()) {
+ if (loopInnerBlocks.contains(predecessor)) {
+ // There is only one loop exit.
+ RELEASE_ASSERT(!loopFoot);
+ loopFoot = predecessor;
+ }
+ }
+ }
+
+ if (verbose) {
+ dataLog("Found loop head:", *loopHead, " foot:", *loopFoot, " prehead ", *loopPreheader, " postfoot ", *loopPostfooter, " inner blocks: ");
+ for (BasicBlock* block : loopInnerBlocks)
+ dataLog(*block, " ");
+ dataLogLn();
+ }
+
+ // Verify that the index is a monotonic inductive variable.
+ Value* startingIndex = nullptr;
+ {
+ if (m_phiChildren.at(source->index).size() != 2)
+ return;
+
+ Value* updateIndex = nullptr;
+ for (Value* up : m_phiChildren.at(source->index)) {
+ if (m_proc.dominators().dominates(up->owner, loopPreheader))
+ startingIndex = up;
+ else
+ updateIndex = up;
+ }
+
+ if (!updateIndex || !startingIndex || !loopInnerBlocks.contains(updateIndex->owner))
+ return;
+ patternMatchedValues.add(updateIndex);
+ patternMatchedValues.add(startingIndex);
+
+ updateIndex = updateIndex->child(0);
+ startingIndex = startingIndex->child(0);
+ if (updateIndex->opcode() != Add || !updateIndex->child(1)->isInt(1) || updateIndex->child(0) != source->index)
+ return;
+
+ if (!startingIndex->isInt(0))
+ return;
+ }
+
+ if (verbose)
+ dataLogLn("Found starting index:", *startingIndex);
+
+ // Identify the loop exit condition.
+ Value* exitCondition = nullptr;
+ // Is this an exit condition or a continuation condition?
+ bool conditionInverted = false;
+ // Do we check the index before or after using it?
+ bool checkBeforeWrite = false;
+ {
+ Value* branch = loopFoot->last();
+ if (branch->opcode() != Branch)
+ return;
+ patternMatchedValues.add(branch);
+ exitCondition = branch->child(0);
+
+ conditionInverted = loopPostfooter != loopFoot->successor(0).block();
+ checkBeforeWrite = m_proc.dominators().strictlyDominates(loopFoot, m_block);
+ }
+
+ if (verbose)
+ dataLogLn("Found exit condition: ", *exitCondition, " inverted: ", conditionInverted, " checks before the first write: ", checkBeforeWrite);
+
+ // Idenfity the loop bound by matching loop bound expressions.
+ Value* loopBound = nullptr;
+ {
+ auto extractLoopBound = [&] (Value* index, Value* bound) -> Value* {
+ // Match i+1 when we do a write first followed by the check for the next iteration
+ if (!checkBeforeWrite && index->opcode() == Add && index->child(0) == source->index && index->child(1)->isInt(1))
+ return bound;
+ return nullptr;
+ };
+
+ if (exitCondition->opcode() == GreaterThan && conditionInverted) {
+ // enter loop again if size > i
+ loopBound = extractLoopBound(exitCondition->child(1), exitCondition->child(0));
+ } else if (exitCondition->opcode() == LessThan && !conditionInverted) {
+ // enter loop again if size < i
+ loopBound = extractLoopBound(exitCondition->child(1), exitCondition->child(0));
+ }
+
+ if (!loopBound) {
+ if (verbose)
+ dataLogLn("Unable to extract bound from exit condition ", *exitCondition);
+ return;
+ }
+ }
+
+ // Make sure there are no other effectful things happening.
+ // This also guarantees that we found the only branch in the loop. This means that our
+ // load/store must happen iff our index update and branch do.
+ for (BasicBlock* block : loopInnerBlocks) {
+ for (unsigned index = 0; index < block->size(); ++index) {
+ Value* value = block->at(index);
+ if (patternMatchedValues.contains(value) || value->opcode() == Jump)
+ continue;
+
+ Effects effects = value->effects();
+ effects.readsLocalState = false;
+ if (effects != Effects::none()) {
+ if (verbose)
+ dataLogLn("Not byte copy loop, node ", *value, " has effects");
+ return;
+ }
+ }
+ }
+
+ if (!hoistValue(loopPreheader, source->arrayBase, false)
+ || !hoistValue(loopPreheader, destination->arrayBase, false)
+ || !hoistValue(loopPreheader, loopBound, false))
+ return;
+
+ if (verbose)
+ dataLogLn("Found byte copy loop: ", *source->arrayBase, "[", *source->index, "] = ", *destination->arrayBase, "[", *destination->index, "] from index ", *startingIndex, " to max index ", *loopBound, " stride: ", elemSize);
+
+ bool hoistResult = hoistValue(loopPreheader, source->arrayBase);
+ hoistResult &= hoistValue(loopPreheader, destination->arrayBase);
+ hoistResult &= hoistValue(loopPreheader, loopBound);
+ ASSERT_UNUSED(hoistResult, hoistResult);
+
+ auto origin = loopPostfooter->at(0)->origin();
+
+ BasicBlock* memcpy = m_blockInsertionSet.insertBefore(loopPostfooter, loopPostfooter->frequency());
+ memcpy->setSuccessors(FrequentedBlock(loopPostfooter));
+ memcpy->addPredecessor(loopPreheader);
+ for (BasicBlock* pred : loopPostfooter->predecessors())
+ loopPostfooter->removePredecessor(pred);
+ loopPostfooter->addPredecessor(memcpy);
+ loopPreheader->setSuccessors(memcpy);
+
+ Effects effects = Effects::forCall();
+ memcpy->appendNew<CCallValue>(m_proc, B3::Void, origin, effects,
+ memcpy->appendNew<ConstPtrValue>(m_proc, origin, tagCFunctionPtr<void*>(fastForwardCopy32, B3CCallPtrTag)),
+ destination->appendAddr(m_proc, memcpy, destination->arrayBase),
+ source->appendAddr(m_proc, memcpy, source->arrayBase),
+ loopBound);
+
+ memcpy->appendNew<Value>(m_proc, Jump, origin);
+ }
+
+ bool hoistValue(BasicBlock* into, Value* value, bool canMutate = true)
+ {
+ if (m_proc.dominators().dominates(value->owner, into))
+ return true;
+
+ Effects effects = value->effects();
+ if (effects != Effects::none()) {
+ if (verbose)
+ dataLogLn("Cannot hoist ", *value, " into ", *into, " since it has effects");
+ return false;
+ }
+
+ for (Value* child : value->children()) {
+ if (!hoistValue(into, child, false))
+ return false;
+ }
+
+ if (canMutate) {
+ for (Value* child : value->children())
+ hoistValue(into, child);
+
+ value->owner->at(value->index()) = m_proc.add<Value>(Nop, Void, value->origin());
+ into->appendNonTerminal(value);
+ }
+
+ return true;
+ }
+
+ bool run()
+ {
+ if (verbose)
+ dataLogLn("ReduceLoopStrength examining proc: ", m_proc);
+ bool result = false;
+
+ do {
+ m_changed = false;
+
+ for (BasicBlock* block : m_proc.blocksInPreOrder()) {
+ m_block = block;
+
+ for (m_index = 0; m_index < block->size(); ++m_index) {
+ m_value = m_block->at(m_index);
+ m_value->performSubstitution();
+ reduceByteCopyLoopsToMemcpy();
+ m_insertionSet.execute(m_block);
+ m_changed |= m_blockInsertionSet.execute();
+
+ if (m_changed) {
+ m_proc.resetReachability();
+ m_proc.invalidateCFG();
+ m_proc.resetValueOwners();
+ result = true;
+ break;
+ }
+ }
+
+ if (m_changed)
+ break;
+ }
+ } while (m_changed && m_proc.optLevel() >= 2);
+
+ if (result && verbose)
+ dataLogLn("ReduceLoopStrength produced proc: ", m_proc);
+
+ return result;
+ }
+#else
+ bool run()
+ {
+ return false;
+ }
+#endif
+
+ Procedure& m_proc;
+ InsertionSet m_insertionSet;
+ BlockInsertionSet m_blockInsertionSet;
+ BasicBlock* m_root { nullptr };
+ BasicBlock* m_block { nullptr };
+ unsigned m_index { 0 };
+ Value* m_value { nullptr };
+ bool m_changed { false };
+ PhiChildren m_phiChildren;
+};
+
+bool reduceLoopStrength(Procedure& proc)
+{
+ PhaseScope phaseScope(proc, "reduceLoopStrength");
+ return ReduceLoopStrength(proc).run();
+}
+
+} } // namespace JSC::B3
+
+#endif // ENABLE(B3_JIT)
Added: trunk/Source/_javascript_Core/b3/B3ReduceLoopStrength.h (0 => 248938)
--- trunk/Source/_javascript_Core/b3/B3ReduceLoopStrength.h (rev 0)
+++ trunk/Source/_javascript_Core/b3/B3ReduceLoopStrength.h 2019-08-21 06:30:05 UTC (rev 248938)
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2019 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#if ENABLE(B3_JIT)
+
+namespace JSC { namespace B3 {
+
+class Procedure;
+
+// Apply transformations that match and simplify entire loops.
+
+bool reduceLoopStrength(Procedure&);
+#if CPU(X86_64)
+JS_EXPORT_PRIVATE void fastForwardCopy32(uint32_t* dst, const uint32_t* src, size_t);
+#endif
+
+} } // namespace JSC::B3
+
+#endif // ENABLE(B3_JIT)
+
Modified: trunk/Source/_javascript_Core/b3/testb3.h (248937 => 248938)
--- trunk/Source/_javascript_Core/b3/testb3.h 2019-08-21 06:00:02 UTC (rev 248937)
+++ trunk/Source/_javascript_Core/b3/testb3.h 2019-08-21 06:30:05 UTC (rev 248938)
@@ -48,6 +48,7 @@
#include "B3MoveConstants.h"
#include "B3NativeTraits.h"
#include "B3Procedure.h"
+#include "B3ReduceLoopStrength.h"
#include "B3ReduceStrength.h"
#include "B3SlotBaseValue.h"
#include "B3StackSlot.h"
@@ -1016,6 +1017,13 @@
void addLoadTests(const char* filter, Deque<RefPtr<SharedTask<void()>>>&);
void addTupleTests(const char* filter, Deque<RefPtr<SharedTask<void()>>>&);
+void testFastForwardCopy32();
+void testByteCopyLoop();
+void testByteCopyLoopStartIsLoopDependent();
+void testByteCopyLoopBoundIsLoopDependent();
+
+void addCopyTests(const char* filter, Deque<RefPtr<SharedTask<void()>>>&);
+
bool shouldRun(const char* filter, const char* testName);
#endif // ENABLE(B3_JIT)
Modified: trunk/Source/_javascript_Core/b3/testb3_1.cpp (248937 => 248938)
--- trunk/Source/_javascript_Core/b3/testb3_1.cpp 2019-08-21 06:00:02 UTC (rev 248937)
+++ trunk/Source/_javascript_Core/b3/testb3_1.cpp 2019-08-21 06:30:05 UTC (rev 248938)
@@ -538,6 +538,8 @@
addLoadTests(filter, tasks);
addTupleTests(filter, tasks);
+ addCopyTests(filter, tasks);
+
RUN(testSpillGP());
RUN(testSpillFP());
Modified: trunk/Source/_javascript_Core/b3/testb3_8.cpp (248937 => 248938)
--- trunk/Source/_javascript_Core/b3/testb3_8.cpp 2019-08-21 06:00:02 UTC (rev 248937)
+++ trunk/Source/_javascript_Core/b3/testb3_8.cpp 2019-08-21 06:30:05 UTC (rev 248938)
@@ -866,4 +866,234 @@
RUN(testLoad<uint16_t>(Load16Z, -1000000000));
}
+void testFastForwardCopy32()
+{
+#if CPU(X86_64)
+ for (const bool aligned : { true, false }) {
+ for (const bool overlap : { false, true }) {
+ for (size_t arrsize : { 1, 4, 5, 6, 8, 10, 12, 16, 20, 40, 100, 1000}) {
+ size_t overlapAmount = 5;
+
+ uint32_t* arr1, *arr2;
+
+ if (overlap) {
+ arr1 = new uint32_t[arrsize * 2];
+ arr2 = arr1 + (arrsize - overlapAmount);
+ } else {
+ arr1 = new uint32_t[arrsize];
+ arr2 = new uint32_t[arrsize];
+ }
+
+ if (!aligned && arrsize < 3)
+ continue;
+ if (overlap && arrsize <= overlapAmount + 3)
+ continue;
+
+ if (!aligned) {
+ ++arr1;
+ ++arr2;
+ arrsize -= 1;
+ overlapAmount -= 1;
+ }
+
+ for (size_t i = 0; i < arrsize; ++i)
+ arr1[i] = i;
+
+ fastForwardCopy32(arr2, arr1, arrsize);
+
+ if (overlap) {
+ for (size_t i = 0; i < arrsize - overlapAmount; ++i)
+ CHECK(arr2[i] == i);
+ for (size_t i = arrsize - overlapAmount; i < arrsize; ++i)
+ CHECK(arr2[i] == i - (arrsize - overlapAmount));
+ } else {
+ for (size_t i = 0; i < arrsize; ++i)
+ CHECK(arr2[i] == i);
+ }
+
+ if (!aligned) {
+ --arr1;
+ --arr2;
+ }
+
+ if (!overlap) {
+ delete[] arr1;
+ delete[] arr2;
+ } else
+ delete[] arr1;
+ }
+ }
+ }
+#endif
+}
+
+void testByteCopyLoop()
+{
+ Procedure proc;
+ BasicBlock* root = proc.addBlock();
+ BasicBlock* head = proc.addBlock();
+ BasicBlock* update = proc.addBlock();
+ BasicBlock* continuation = proc.addBlock();
+
+ auto* arraySrc = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0);
+ auto* arrayDst = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1);
+ auto* arraySize = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR2);
+ auto* _one_ = root->appendNew<Const32Value>(proc, Origin(), 1);
+ auto* two = root->appendNew<Const32Value>(proc, Origin(), 2);
+ UpsilonValue* startingIndex = root->appendNew<UpsilonValue>(proc, Origin(), root->appendNew<Const32Value>(proc, Origin(), 0));
+ root->appendNew<Value>(proc, Jump, Origin());
+ root->setSuccessors(FrequentedBlock(head));
+
+ auto* index = head->appendNew<Value>(proc, Phi, Int32, Origin());
+ startingIndex->setPhi(index);
+ auto* loadIndex = head->appendNew<Value>(proc, Add, Origin(), arraySrc, head->appendNew<Value>(proc, Shl, Origin(), index, two));
+ auto* storeIndex = head->appendNew<Value>(proc, Add, Origin(), arrayDst, head->appendNew<Value>(proc, Shl, Origin(), index, two));
+ head->appendNew<MemoryValue>(proc, Store, Origin(), head->appendNew<MemoryValue>(proc, Load, Int32, Origin(), loadIndex), storeIndex);
+ auto* newIndex = head->appendNew<Value>(proc, Add, Origin(), index, one);
+ auto* cmpValue = head->appendNew<Value>(proc, GreaterThan, Origin(), newIndex, arraySize);
+ head->appendNew<Value>(proc, Branch, Origin(), cmpValue);
+ head->setSuccessors(FrequentedBlock(continuation), FrequentedBlock(update));
+
+ UpsilonValue* updateIndex = update->appendNew<UpsilonValue>(proc, Origin(), newIndex);
+ updateIndex->setPhi(index);
+ update->appendNew<Value>(proc, Jump, Origin());
+ update->setSuccessors(FrequentedBlock(head));
+
+ continuation->appendNewControlValue(proc, Return, Origin());
+
+ int* arr1 = new int[3];
+ int* arr2 = new int[3];
+
+ arr1[0] = 0;
+ arr1[1] = 0;
+ arr1[2] = 0;
+ arr2[0] = 1;
+ arr2[1] = 2;
+ arr2[2] = 3;
+
+ compileAndRun<void>(proc, arr2, arr1, 3);
+
+ CHECK_EQ(arr1[0], 1);
+ CHECK_EQ(arr1[1], 2);
+ CHECK_EQ(arr1[2], 3);
+
+ delete[] arr1;
+ delete [] arr2;
+}
+
+void testByteCopyLoopStartIsLoopDependent()
+{
+ Procedure proc;
+ BasicBlock* root = proc.addBlock();
+ BasicBlock* head = proc.addBlock();
+ BasicBlock* update = proc.addBlock();
+ BasicBlock* continuation = proc.addBlock();
+
+ auto* arraySrc = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0);
+ auto* arrayDst = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1);
+ auto* arraySize = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR2);
+ auto* _one_ = root->appendNew<Const32Value>(proc, Origin(), 1);
+ auto* two = root->appendNew<Const32Value>(proc, Origin(), 2);
+ root->appendNew<Value>(proc, Jump, Origin());
+ root->setSuccessors(FrequentedBlock(head));
+
+ UpsilonValue* startingIndex = head->appendNew<UpsilonValue>(proc, Origin(), head->appendNew<Const32Value>(proc, Origin(), 0));
+ auto* index = head->appendNew<Value>(proc, Phi, Int32, Origin());
+ startingIndex->setPhi(index);
+ auto* loadIndex = head->appendNew<Value>(proc, Add, Origin(), arraySrc, head->appendNew<Value>(proc, Shl, Origin(), index, two));
+ auto* storeIndex = head->appendNew<Value>(proc, Add, Origin(), arrayDst, head->appendNew<Value>(proc, Shl, Origin(), index, two));
+ head->appendNew<MemoryValue>(proc, Store, Origin(), head->appendNew<MemoryValue>(proc, Load, Int32, Origin(), loadIndex), storeIndex);
+ auto* newIndex = head->appendNew<Value>(proc, Add, Origin(), index, one);
+ auto* cmpValue = head->appendNew<Value>(proc, GreaterThan, Origin(), newIndex, arraySize);
+ head->appendNew<Value>(proc, Branch, Origin(), cmpValue);
+ head->setSuccessors(FrequentedBlock(continuation), FrequentedBlock(update));
+
+ UpsilonValue* updateIndex = update->appendNew<UpsilonValue>(proc, Origin(), newIndex);
+ updateIndex->setPhi(index);
+ update->appendNew<Value>(proc, Jump, Origin());
+ update->setSuccessors(FrequentedBlock(head));
+
+ continuation->appendNewControlValue(proc, Return, Origin());
+
+ int* arr1 = new int[3];
+ int* arr2 = new int[3];
+
+ arr1[0] = 0;
+ arr1[1] = 0;
+ arr1[2] = 0;
+ arr2[0] = 1;
+ arr2[1] = 2;
+ arr2[2] = 3;
+
+ compileAndRun<void>(proc, arr2, arr1, 0);
+
+ CHECK_EQ(arr1[0], 1);
+ CHECK_EQ(arr1[1], 0);
+ CHECK_EQ(arr1[2], 0);
+
+ delete[] arr1;
+ delete [] arr2;
+}
+
+void testByteCopyLoopBoundIsLoopDependent()
+{
+ Procedure proc;
+ BasicBlock* root = proc.addBlock();
+ BasicBlock* head = proc.addBlock();
+ BasicBlock* update = proc.addBlock();
+ BasicBlock* continuation = proc.addBlock();
+
+ auto* arraySrc = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0);
+ auto* arrayDst = root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1);
+ auto* _one_ = root->appendNew<Const32Value>(proc, Origin(), 1);
+ auto* two = root->appendNew<Const32Value>(proc, Origin(), 2);
+ UpsilonValue* startingIndex = root->appendNew<UpsilonValue>(proc, Origin(), root->appendNew<Const32Value>(proc, Origin(), 0));
+ root->appendNew<Value>(proc, Jump, Origin());
+ root->setSuccessors(FrequentedBlock(head));
+
+ auto* index = head->appendNew<Value>(proc, Phi, Int32, Origin());
+ startingIndex->setPhi(index);
+ auto* loadIndex = head->appendNew<Value>(proc, Add, Origin(), arraySrc, head->appendNew<Value>(proc, Shl, Origin(), index, two));
+ auto* storeIndex = head->appendNew<Value>(proc, Add, Origin(), arrayDst, head->appendNew<Value>(proc, Shl, Origin(), index, two));
+ head->appendNew<MemoryValue>(proc, Store, Origin(), head->appendNew<MemoryValue>(proc, Load, Int32, Origin(), loadIndex), storeIndex);
+ auto* newIndex = head->appendNew<Value>(proc, Add, Origin(), index, one);
+ auto* cmpValue = head->appendNew<Value>(proc, GreaterThan, Origin(), newIndex, index);
+ head->appendNew<Value>(proc, Branch, Origin(), cmpValue);
+ head->setSuccessors(FrequentedBlock(continuation), FrequentedBlock(update));
+
+ UpsilonValue* updateIndex = update->appendNew<UpsilonValue>(proc, Origin(), newIndex);
+ updateIndex->setPhi(index);
+ update->appendNew<Value>(proc, Jump, Origin());
+ update->setSuccessors(FrequentedBlock(head));
+
+ continuation->appendNewControlValue(proc, Return, Origin());
+
+ int* arr1 = new int[3];
+ int* arr2 = new int[3];
+
+ arr1[0] = 0;
+ arr1[1] = 0;
+ arr1[2] = 0;
+ arr2[0] = 1;
+ arr2[1] = 2;
+ arr2[2] = 3;
+
+ compileAndRun<void>(proc, arr2, arr1, 3);
+
+ CHECK_EQ(arr1[0], 1);
+ CHECK_EQ(arr1[1], 0);
+ CHECK_EQ(arr1[2], 0);
+
+ delete[] arr1;
+ delete [] arr2;
+}
+
+void addCopyTests(const char* filter, Deque<RefPtr<SharedTask<void()>>>& tasks)
+{
+ RUN(testFastForwardCopy32());
+ RUN(testByteCopyLoop());
+ RUN(testByteCopyLoopStartIsLoopDependent());
+ RUN(testByteCopyLoopBoundIsLoopDependent());
+}
+
#endif // ENABLE(B3_JIT)