Title: [285197] trunk/Source/_javascript_Core
Revision
285197
Author
ross.kirsl...@sony.com
Date
2021-11-02 20:23:51 -0700 (Tue, 02 Nov 2021)

Log Message

[JSC] Add LLInt fast path for OpMod on x86_64
https://bugs.webkit.org/show_bug.cgi?id=232644

Reviewed by Saam Barati and Yusuke Suzuki.

This patch ports the x86_64 fast path for OpMod from baseline JIT to LLInt.

This is quite similar to OpDiv but the implementation avoids using binaryOpCustomStore
because OpMod is not a ProfiledBinaryOp.

Performance results appear negligible with all JIT tiers enabled;
relevant microbenchmarks with JIT off are as follows.

                                            Before                    After

int-or-other-mod-then-get-by-val      158.0136+-0.9338     ^     39.7698+-0.4394        ^ definitely 3.9732x faster
integer-modulo                         15.1972+-0.4197     ^      7.1461+-0.1530        ^ definitely 2.1266x faster
mod-boolean                           145.4011+-2.0483     ?    146.1243+-1.6816        ?
mod-boolean-double                    145.6148+-1.8530     ?    145.9380+-1.7073        ?
mod-untyped                           286.9585+-3.9535          284.0360+-4.1221          might be 1.0103x faster
negative-zero-modulo                    1.2951+-0.1275     ^      1.0220+-0.0412        ^ definitely 1.2672x faster

<geometric>                            51.5408+-0.8164     ^     34.7341+-0.3365        ^ definitely 1.4839x faster

* jit/GPRInfo.h: Add assertions.
* llint/LowLevelInterpreter.asm:
* llint/LowLevelInterpreter32_64.asm:
* llint/LowLevelInterpreter64.asm:

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (285196 => 285197)


--- trunk/Source/_javascript_Core/ChangeLog	2021-11-03 03:08:11 UTC (rev 285196)
+++ trunk/Source/_javascript_Core/ChangeLog	2021-11-03 03:23:51 UTC (rev 285197)
@@ -1,3 +1,34 @@
+2021-11-02  Ross Kirsling  <ross.kirsl...@sony.com>
+
+        [JSC] Add LLInt fast path for OpMod on x86_64
+        https://bugs.webkit.org/show_bug.cgi?id=232644
+
+        Reviewed by Saam Barati and Yusuke Suzuki.
+
+        This patch ports the x86_64 fast path for OpMod from baseline JIT to LLInt.
+
+        This is quite similar to OpDiv but the implementation avoids using binaryOpCustomStore
+        because OpMod is not a ProfiledBinaryOp.
+
+        Performance results appear negligible with all JIT tiers enabled;
+        relevant microbenchmarks with JIT off are as follows.
+
+                                                    Before                    After
+
+        int-or-other-mod-then-get-by-val      158.0136+-0.9338     ^     39.7698+-0.4394        ^ definitely 3.9732x faster
+        integer-modulo                         15.1972+-0.4197     ^      7.1461+-0.1530        ^ definitely 2.1266x faster
+        mod-boolean                           145.4011+-2.0483     ?    146.1243+-1.6816        ?
+        mod-boolean-double                    145.6148+-1.8530     ?    145.9380+-1.7073        ?
+        mod-untyped                           286.9585+-3.9535          284.0360+-4.1221          might be 1.0103x faster
+        negative-zero-modulo                    1.2951+-0.1275     ^      1.0220+-0.0412        ^ definitely 1.2672x faster
+
+        <geometric>                            51.5408+-0.8164     ^     34.7341+-0.3365        ^ definitely 1.4839x faster
+
+        * jit/GPRInfo.h: Add assertions.
+        * llint/LowLevelInterpreter.asm:
+        * llint/LowLevelInterpreter32_64.asm:
+        * llint/LowLevelInterpreter64.asm:
+
 2021-11-02  Don Olmstead  <don.olmst...@sony.com>
 
         Non-unified build fixes early November 2021 edition

Modified: trunk/Source/_javascript_Core/jit/GPRInfo.h (285196 => 285197)


--- trunk/Source/_javascript_Core/jit/GPRInfo.h	2021-11-03 03:08:11 UTC (rev 285196)
+++ trunk/Source/_javascript_Core/jit/GPRInfo.h	2021-11-03 03:23:51 UTC (rev 285197)
@@ -544,6 +544,9 @@
     static constexpr unsigned InvalidIndex = 0xffffffff;
 };
 
+static_assert(GPRInfo::regT0 == X86Registers::eax);
+static_assert(GPRInfo::returnValueGPR2 == X86Registers::edx);
+
 #endif // CPU(X86_64)
 
 #if CPU(ARM_THUMB2)

Modified: trunk/Source/_javascript_Core/llint/LowLevelInterpreter.asm (285196 => 285197)


--- trunk/Source/_javascript_Core/llint/LowLevelInterpreter.asm	2021-11-03 03:08:11 UTC (rev 285196)
+++ trunk/Source/_javascript_Core/llint/LowLevelInterpreter.asm	2021-11-03 03:23:51 UTC (rev 285197)
@@ -2098,7 +2098,6 @@
 
 slowPathOp(is_callable)
 slowPathOp(is_constructor)
-slowPathOp(mod)
 slowPathOp(new_array_buffer)
 slowPathOp(new_array_with_spread)
 slowPathOp(pow)

Modified: trunk/Source/_javascript_Core/llint/LowLevelInterpreter32_64.asm (285196 => 285197)


--- trunk/Source/_javascript_Core/llint/LowLevelInterpreter32_64.asm	2021-11-03 03:08:11 UTC (rev 285196)
+++ trunk/Source/_javascript_Core/llint/LowLevelInterpreter32_64.asm	2021-11-03 03:23:51 UTC (rev 285197)
@@ -3092,3 +3092,4 @@
 slowPathOp(enumerator_get_by_val)
 slowPathOp(enumerator_in_by_val)
 slowPathOp(enumerator_has_own_property)
+slowPathOp(mod)

Modified: trunk/Source/_javascript_Core/llint/LowLevelInterpreter64.asm (285196 => 285197)


--- trunk/Source/_javascript_Core/llint/LowLevelInterpreter64.asm	2021-11-03 03:08:11 UTC (rev 285196)
+++ trunk/Source/_javascript_Core/llint/LowLevelInterpreter64.asm	2021-11-03 03:23:51 UTC (rev 285197)
@@ -1230,14 +1230,13 @@
         macro (lhs, rhs, slow, index)
             # Assume t3 is scratchable.
             btiz rhs, slow
-            bineq rhs, -1, .notNeg2TwoThe31DivByNeg1
+            bineq rhs, -1, .notNeg2ToThe31DivByNeg1
             bieq lhs, -2147483648, slow
-        .notNeg2TwoThe31DivByNeg1:
+        .notNeg2ToThe31DivByNeg1:
             btinz lhs, .intOK
             bilt rhs, 0, slow
         .intOK:
             move rhs, t3
-            move lhs, t0
             cdqi
             idivi t3
             btinz t1, slow
@@ -1284,7 +1283,40 @@
     macro (lhs, rhs, slow) bsubio rhs, lhs, slow end,
     macro (lhs, rhs) subd rhs, lhs end)
 
+if X86_64 or X86_64_WIN
+    llintOpWithReturn(op_mod, OpMod, macro (size, get, dispatch, return)
+        get(m_rhs, t0)
+        get(m_lhs, t2)
+        loadConstantOrVariableInt32(size, t0, t1, .slow)
+        loadConstantOrVariableInt32(size, t2, t0, .slow)
 
+        # Assume t3 is scratchable.
+        # r1 is always edx (even on Windows).
+        btiz t1, .slow
+        bineq t1, -1, .notNeg2ToThe31ModByNeg1
+        bieq t0, -2147483648, .slow
+    .notNeg2ToThe31ModByNeg1:
+        move t1, t3
+        bilt t0, 0, .needsNegZeroCheck
+        cdqi
+        idivi t3
+        orq numberTag, r1
+        return(r1)
+    .needsNegZeroCheck:
+        cdqi
+        idivi t3
+        btiz r1, .slow
+        orq numberTag, r1
+        return(r1)
+
+    .slow:
+        callSlowPath(_slow_path_mod)
+        dispatch()
+    end)
+else
+    slowPathOp(mod)
+end
+
 llintOpWithReturn(op_unsigned, OpUnsigned, macro (size, get, dispatch, return)
     get(m_operand, t1)
     loadConstantOrVariable(size, t1, t2)
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to