Hi, thank you:) I have OCA signed and processed.
I have changed Integer.reverseBytes, and added two small checks to BitTwiddle tests for Integer and Long, patch is attached. Jaroslav 2016-04-29 14:33 GMT+02:00 Claes Redestad <claes.redes...@oracle.com>: > Hi, > > > On 2016-04-29 13:36, Jaroslav Kameník wrote: > >> Hello! >> >> I have a small patch to Integer and Long classes, which is speeding up bit >> reversion significantly. >> >> Last two/three steps of bit reversion are doing byte reversion, so there >> is >> possibility to use >> intrinsified method reverseBytes. Java implementation of reverseBytes is >> similar to those >> steps, so it should give similar performance when intrinsics are not >> available. Here I have >> result of few performance tests (thank for hints, Aleksej:) : >> >> >> old code: >> >> # VM options: -server >> ReverseInt.reverse 1 avgt 5 8,766 ± 0,214 ns/op >> ReverseLong.reverse 1 avgt 5 9,992 ± 0,165 ns/op >> >> # VM options: -client >> ReverseInt.reverse 1 avgt 5 9,168 ± 0,268 ns/op >> ReverseLong.reverse 1 avgt 5 9,988 ± 0,123 ns/op >> >> >> patched: >> >> # VM options: -server >> ReverseInt.reverse 1 avgt 5 6,411 ± 0,046 ns/op >> ReverseLong.reverse 1 avgt 5 6,299 ± 0,158 ns/op >> >> # VM options: -client >> ReverseInt.reverse 1 avgt 5 6,430 ± 0,022 ns/op >> ReverseLong.reverse 1 avgt 5 6,301 ± 0,097 ns/op >> >> >> patched, intrinsics disabled: >> >> # VM options: -server -XX:DisableIntrinsic=_reverseBytes_i,_reverseBytes_l >> ReverseInt.reverse 1 avgt 5 9,597 ± 0,206 ns/op >> ReverseLong.reverse 1 avgt 5 9,966 ± 0,151 ns/op >> >> # VM options: -client -XX:DisableIntrinsic=_reverseBytes_i,_reverseBytes_l >> ReverseInt.reverse 1 avgt 5 9,609 ± 0,069 ns/op >> ReverseLong.reverse 1 avgt 5 9,968 ± 0,075 ns/op >> >> >> >> You can see, there is little slowdown in integer case with intrinsics >> disabled. >> It seems to be caused by different 'shape' of byte reverting code in >> Integer.reverse and Integer.reverseByte. I tried to replace reverseByte >> code >> with piece of reverse method, and it is as fast as not patched case: >> >> >> ReverseInt.reverse 1 avgt 5 9,184 ± 0,255 ns/op >> >> >> Diffs from jdk9: >> >> Integer.java: >> >> @@ -1779,9 +1805,8 @@ >> i = (i & 0x55555555) << 1 | (i >>> 1) & 0x55555555; >> i = (i & 0x33333333) << 2 | (i >>> 2) & 0x33333333; >> i = (i & 0x0f0f0f0f) << 4 | (i >>> 4) & 0x0f0f0f0f; >> - i = (i << 24) | ((i & 0xff00) << 8) | >> - ((i >>> 8) & 0xff00) | (i >>> 24); >> - return i; >> + >> + return reverseBytes(i); >> >> Long.java: >> >> @@ -1940,10 +1997,8 @@ >> i = (i & 0x5555555555555555L) << 1 | (i >>> 1) & >> 0x5555555555555555L; >> i = (i & 0x3333333333333333L) << 2 | (i >>> 2) & >> 0x3333333333333333L; >> i = (i & 0x0f0f0f0f0f0f0f0fL) << 4 | (i >>> 4) & >> 0x0f0f0f0f0f0f0f0fL; >> - i = (i & 0x00ff00ff00ff00ffL) << 8 | (i >>> 8) & >> 0x00ff00ff00ff00ffL; >> - i = (i << 48) | ((i & 0xffff0000L) << 16) | >> - ((i >>> 16) & 0xffff0000L) | (i >>> 48); >> - return i; >> + >> + return reverseBytes(i); >> > > reduces code duplication, improves performance... what's the catch!? :-) > > Updating Integer.reverseByte to have the same 'shape' as in > Integer.reverseByte seems reasonable in this case. > > I can sponsor this if you've signed the OCA etc. > > Thanks! > > /Claes >
--- old/src/java.base/share/classes/java/lang/Integer.java 2016-04-29 17:52:22.407574334 +0200 +++ new/src/java.base/share/classes/java/lang/Integer.java 2016-04-29 17:52:22.295575132 +0200 @@ -1790,9 +1790,8 @@ i = (i & 0x55555555) << 1 | (i >>> 1) & 0x55555555; i = (i & 0x33333333) << 2 | (i >>> 2) & 0x33333333; i = (i & 0x0f0f0f0f) << 4 | (i >>> 4) & 0x0f0f0f0f; - i = (i << 24) | ((i & 0xff00) << 8) | - ((i >>> 8) & 0xff00) | (i >>> 24); - return i; + + return reverseBytes(i); } /** @@ -1820,10 +1819,10 @@ */ @HotSpotIntrinsicCandidate public static int reverseBytes(int i) { - return ((i >>> 24) ) | - ((i >> 8) & 0xFF00) | - ((i << 8) & 0xFF0000) | - ((i << 24)); + return (i << 24) | + ((i & 0xff00) << 8) | + ((i >>> 8) & 0xff00) | + (i >>> 24); } /** --- old/src/java.base/share/classes/java/lang/Long.java 2016-04-29 17:52:22.667572486 +0200 +++ new/src/java.base/share/classes/java/lang/Long.java 2016-04-29 17:52:22.555573282 +0200 @@ -1952,10 +1952,8 @@ i = (i & 0x5555555555555555L) << 1 | (i >>> 1) & 0x5555555555555555L; i = (i & 0x3333333333333333L) << 2 | (i >>> 2) & 0x3333333333333333L; i = (i & 0x0f0f0f0f0f0f0f0fL) << 4 | (i >>> 4) & 0x0f0f0f0f0f0f0f0fL; - i = (i & 0x00ff00ff00ff00ffL) << 8 | (i >>> 8) & 0x00ff00ff00ff00ffL; - i = (i << 48) | ((i & 0xffff0000L) << 16) | - ((i >>> 16) & 0xffff0000L) | (i >>> 48); - return i; + + return reverseBytes(i); } /** --- old/test/java/lang/Integer/BitTwiddle.java 2016-04-29 17:52:22.931570609 +0200 +++ new/test/java/lang/Integer/BitTwiddle.java 2016-04-29 17:52:22.815571433 +0200 @@ -135,5 +135,9 @@ if (bitCount(x) != bitCount(reverseBytes(x))) throw new RuntimeException("z: " + toHexString(x)); } + + if(reverse(0b0001_0010_0011_0100_0101_0110_0111_1001) != + 0b1001_1110_0110_1010_0010_1100_0100_1000) + throw new RuntimeException("reverse"); } } --- old/test/java/lang/Long/BitTwiddle.java 2016-04-29 17:52:23.195568732 +0200 +++ new/test/java/lang/Long/BitTwiddle.java 2016-04-29 17:52:23.083569529 +0200 @@ -135,5 +135,9 @@ if (bitCount(x) != bitCount(reverseBytes(x))) throw new RuntimeException("z: " + toHexString(x)); } + + if(reverse(0b0001_1111_0010_1110_0011_1101_0100_1100_0101_1011_0110_1010_1000_1001_1000_0111L) != + 0b1110_0001_1001_0001_0101_0110_1101_1010_0011_0010_1011_1100_0111_0100_1111_1000L) + throw new RuntimeException("reverse"); } }