Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
bbeaudreault merged PR #5576: URL: https://github.com/apache/hbase/pull/5576 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
bbeaudreault commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1439917332 ## hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java: ## @@ -468,38 +468,75 @@ public static void writeVLong(ByteBuffer out, long i) { } } - private interface ByteVisitor { -byte get(); - } - - private static long readVLong(ByteVisitor visitor) { -byte firstByte = visitor.get(); + /** + * Similar to {@link WritableUtils#readVLong(java.io.DataInput)} but reads from a + * {@link ByteBuff}. + */ + public static long readVLong(ByteBuff buf) { +byte firstByte = buf.get(); int len = WritableUtils.decodeVIntSize(firstByte); if (len == 1) { return firstByte; +} else { + int remaining = len - 1; + long i = 0; + int offsetFromPos = 0; + if (remaining >= Bytes.SIZEOF_INT) { +// The int read has to be converted to unsigned long so the & op +i = (buf.getIntAfterPosition(offsetFromPos) & 0xL); +remaining -= Bytes.SIZEOF_INT; +offsetFromPos += Bytes.SIZEOF_INT; + } + if (remaining >= Bytes.SIZEOF_SHORT) { +short s = buf.getShortAfterPosition(offsetFromPos); +i = i << 16; +i = i | (s & 0x); +remaining -= Bytes.SIZEOF_SHORT; +offsetFromPos += Bytes.SIZEOF_SHORT; + } + for (int idx = 0; idx < remaining; idx++) { +byte b = buf.getByteAfterPosition(offsetFromPos + idx); +i = i << 8; +i = i | (b & 0xFF); + } + buf.skip(len - 1); + return WritableUtils.isNegativeVInt(firstByte) ? ~i : i; } -long i = 0; -for (int idx = 0; idx < len - 1; idx++) { - byte b = visitor.get(); - i = i << 8; - i = i | (b & 0xFF); -} -return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i); } /** * Similar to {@link WritableUtils#readVLong(DataInput)} but reads from a {@link ByteBuffer}. */ - public static long readVLong(ByteBuffer in) { -return readVLong(in::get); - } - - /** - * Similar to {@link WritableUtils#readVLong(java.io.DataInput)} but reads from a - * {@link ByteBuff}. - */ - public static long readVLong(ByteBuff in) { -return readVLong(in::get); + public static long readVLong(ByteBuffer buf) { +byte firstByte = buf.get(); +int len = WritableUtils.decodeVIntSize(firstByte); +if (len == 1) { + return firstByte; +} else { + int remaining = len - 1; + long i = 0; + int offsetFromPos = 0; + if (remaining >= Bytes.SIZEOF_INT) { +// The int read has to be converted to unsigned long so the & op +i = (buf.getInt(buf.position() + offsetFromPos) & 0xL); Review Comment: > This is needed as the original implementation from HBASE-14186 has a small bug which was fixed by HBASE-16624. Ah ok thanks > This is the implementation for Java NIO ByteBuffer's which AFAIK don't have the same getIntAfterPosition as HBase's ByteBuff class Whoops, I missed that distinction and see you are using the getAfterPosition in the other method above. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1439908082 ## hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java: ## @@ -468,38 +468,75 @@ public static void writeVLong(ByteBuffer out, long i) { } } - private interface ByteVisitor { -byte get(); - } - - private static long readVLong(ByteVisitor visitor) { -byte firstByte = visitor.get(); + /** + * Similar to {@link WritableUtils#readVLong(java.io.DataInput)} but reads from a + * {@link ByteBuff}. + */ + public static long readVLong(ByteBuff buf) { +byte firstByte = buf.get(); int len = WritableUtils.decodeVIntSize(firstByte); if (len == 1) { return firstByte; +} else { + int remaining = len - 1; + long i = 0; + int offsetFromPos = 0; + if (remaining >= Bytes.SIZEOF_INT) { +// The int read has to be converted to unsigned long so the & op +i = (buf.getIntAfterPosition(offsetFromPos) & 0xL); +remaining -= Bytes.SIZEOF_INT; +offsetFromPos += Bytes.SIZEOF_INT; + } + if (remaining >= Bytes.SIZEOF_SHORT) { +short s = buf.getShortAfterPosition(offsetFromPos); +i = i << 16; +i = i | (s & 0x); +remaining -= Bytes.SIZEOF_SHORT; +offsetFromPos += Bytes.SIZEOF_SHORT; + } + for (int idx = 0; idx < remaining; idx++) { +byte b = buf.getByteAfterPosition(offsetFromPos + idx); +i = i << 8; +i = i | (b & 0xFF); + } + buf.skip(len - 1); + return WritableUtils.isNegativeVInt(firstByte) ? ~i : i; } -long i = 0; -for (int idx = 0; idx < len - 1; idx++) { - byte b = visitor.get(); - i = i << 8; - i = i | (b & 0xFF); -} -return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i); } /** * Similar to {@link WritableUtils#readVLong(DataInput)} but reads from a {@link ByteBuffer}. */ - public static long readVLong(ByteBuffer in) { -return readVLong(in::get); - } - - /** - * Similar to {@link WritableUtils#readVLong(java.io.DataInput)} but reads from a - * {@link ByteBuff}. - */ - public static long readVLong(ByteBuff in) { -return readVLong(in::get); + public static long readVLong(ByteBuffer buf) { +byte firstByte = buf.get(); +int len = WritableUtils.decodeVIntSize(firstByte); +if (len == 1) { + return firstByte; +} else { + int remaining = len - 1; + long i = 0; + int offsetFromPos = 0; + if (remaining >= Bytes.SIZEOF_INT) { +// The int read has to be converted to unsigned long so the & op +i = (buf.getInt(buf.position() + offsetFromPos) & 0xL); Review Comment: > why do we need to do this & here when the original impl in [HBASE-14186](https://issues.apache.org/jira/browse/HBASE-14186) did not? This is needed as the original implementation from [HBASE-14186](https://issues.apache.org/jira/browse/HBASE-14186) has a small bug which was fixed by [HBASE-16624](https://issues.apache.org/jira/browse/HBASE-16624). > Also fwiw, you can use getIntAfterPosition() (same for getShortAfterPosition() and getByteAfterPosition() below), to avoid the duplication of calling buf.position() + offset in each of these. This will also have a minor perf benefit (until [HBASE-27730](https://issues.apache.org/jira/browse/HBASE-27730) is implemented) because it doesn't incur an extra checkRefCount() call). This is the implementation for Java NIO ByteBuffer's which AFAIK don't have the same `getIntAfterPosition` as HBase's ByteBuff class. These ports are pretty direct translations from HBase ByteBuff -> Java ByteBuffer. Since these are Java ByteBuffer's there shouldn't be any performance penalty for calling `position()` since it's not gated by a RefCnt check -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
bbeaudreault commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1439753125 ## hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java: ## @@ -468,38 +468,75 @@ public static void writeVLong(ByteBuffer out, long i) { } } - private interface ByteVisitor { -byte get(); - } - - private static long readVLong(ByteVisitor visitor) { -byte firstByte = visitor.get(); + /** + * Similar to {@link WritableUtils#readVLong(java.io.DataInput)} but reads from a + * {@link ByteBuff}. + */ + public static long readVLong(ByteBuff buf) { +byte firstByte = buf.get(); int len = WritableUtils.decodeVIntSize(firstByte); if (len == 1) { return firstByte; +} else { + int remaining = len - 1; + long i = 0; + int offsetFromPos = 0; + if (remaining >= Bytes.SIZEOF_INT) { +// The int read has to be converted to unsigned long so the & op +i = (buf.getIntAfterPosition(offsetFromPos) & 0xL); +remaining -= Bytes.SIZEOF_INT; +offsetFromPos += Bytes.SIZEOF_INT; + } + if (remaining >= Bytes.SIZEOF_SHORT) { +short s = buf.getShortAfterPosition(offsetFromPos); +i = i << 16; +i = i | (s & 0x); +remaining -= Bytes.SIZEOF_SHORT; +offsetFromPos += Bytes.SIZEOF_SHORT; + } + for (int idx = 0; idx < remaining; idx++) { +byte b = buf.getByteAfterPosition(offsetFromPos + idx); +i = i << 8; +i = i | (b & 0xFF); + } + buf.skip(len - 1); + return WritableUtils.isNegativeVInt(firstByte) ? ~i : i; } -long i = 0; -for (int idx = 0; idx < len - 1; idx++) { - byte b = visitor.get(); - i = i << 8; - i = i | (b & 0xFF); -} -return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i); } /** * Similar to {@link WritableUtils#readVLong(DataInput)} but reads from a {@link ByteBuffer}. */ - public static long readVLong(ByteBuffer in) { -return readVLong(in::get); - } - - /** - * Similar to {@link WritableUtils#readVLong(java.io.DataInput)} but reads from a - * {@link ByteBuff}. - */ - public static long readVLong(ByteBuff in) { -return readVLong(in::get); + public static long readVLong(ByteBuffer buf) { +byte firstByte = buf.get(); +int len = WritableUtils.decodeVIntSize(firstByte); +if (len == 1) { + return firstByte; +} else { + int remaining = len - 1; + long i = 0; + int offsetFromPos = 0; + if (remaining >= Bytes.SIZEOF_INT) { +// The int read has to be converted to unsigned long so the & op +i = (buf.getInt(buf.position() + offsetFromPos) & 0xL); Review Comment: why do we need to do this `&` here when the original impl in HBASE-14186 did not? Also fwiw, you can use `getIntAfterPosition()` (same for `getShortAfterPosition()` and `getByteAfterPosition()` below), to avoid the duplication of calling `buf.position() + offset` in each of these. This will also have a minor perf benefit (until HBASE-27730 is implemented) because it doesn't incur an extra checkRefCount() call). That might actually account for some of the perf difference between this and readVLongTimestamp at the larger values. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-187201 Updated benchmarks can be found in this [JIRA comment](https://issues.apache.org/jira/browse/HBASE-28256?focusedCommentId=17799506=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17799506). The [HBASE-14186](https://issues.apache.org/jira/browse/HBASE-14186) approach is now much closer in performance to the readVLongTimestamp approach after fixing the bug. I think that the numbers generally show that the [HBASE-14186](https://issues.apache.org/jira/browse/HBASE-14186) is at least as good if not better for almost all vLong sizes than our existing approach. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1866486740 The approach that I benchmarked from [HBASE-14186](https://issues.apache.org/jira/browse/HBASE-14186) (and ultimately updated this PR to use) had a bug which I fixed in https://github.com/apache/hbase/pull/5576/commits/efcb0667e316fc5b44cc113c23e163eebb2e1cc3. Fixing the bug required making the method potentially slower, so I'm going to rerun the benchmarks for the fixed implementation. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1865552399 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 34s | Docker mode activated. | ||| _ Prechecks _ | | +1 :green_heart: | dupname | 0m 0s | No case conflicting files found. | | +1 :green_heart: | hbaseanti | 0m 0s | Patch does not have any anti-patterns. | | +1 :green_heart: | @author | 0m 0s | The patch does not contain any @author tags. | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 40s | master passed | | +1 :green_heart: | compile | 0m 33s | master passed | | +1 :green_heart: | checkstyle | 0m 15s | master passed | | +1 :green_heart: | spotless | 0m 41s | branch has no errors when running spotless:check. | | +1 :green_heart: | spotbugs | 0m 30s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 39s | the patch passed | | +1 :green_heart: | compile | 0m 33s | the patch passed | | +1 :green_heart: | javac | 0m 33s | the patch passed | | +1 :green_heart: | checkstyle | 0m 16s | the patch passed | | +1 :green_heart: | whitespace | 0m 0s | The patch has no whitespace issues. | | +1 :green_heart: | hadoopcheck | 9m 25s | Patch does not cause any errors with Hadoop 3.2.4 3.3.6. | | +1 :green_heart: | spotless | 0m 43s | patch has no errors when running spotless:check. | | +1 :green_heart: | spotbugs | 0m 39s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | asflicense | 0m 9s | The patch does not generate ASF License warnings. | | | | 25m 18s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/4/artifact/yetus-general-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | dupname asflicense javac spotbugs hadoopcheck hbaseanti spotless checkstyle compile | | uname | Linux dc21ebe1c0ff 5.4.0-163-generic #180-Ubuntu SMP Tue Sep 5 13:21:23 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / 2c07847656 | | Default Java | Eclipse Adoptium-11.0.17+8 | | Max. process+thread count | 79 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/4/console | | versions | git=2.34.1 maven=3.8.6 spotbugs=4.7.3 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1865542255 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 47s | Docker mode activated. | | -0 :warning: | yetus | 0m 3s | Unprocessed flag(s): --brief-report-file --spotbugs-strict-precheck --whitespace-eol-ignore-list --whitespace-tabs-ignore-list --quick-hadoopcheck | ||| _ Prechecks _ | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 55s | master passed | | +1 :green_heart: | compile | 0m 19s | master passed | | +1 :green_heart: | shadedjars | 4m 58s | branch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 16s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 43s | the patch passed | | +1 :green_heart: | compile | 0m 17s | the patch passed | | +1 :green_heart: | javac | 0m 17s | the patch passed | | +1 :green_heart: | shadedjars | 4m 56s | patch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 16s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | unit | 2m 41s | hbase-common in the patch passed. | | | | 21m 7s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/4/artifact/yetus-jdk11-hadoop3-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | javac javadoc unit shadedjars compile | | uname | Linux 2ee0af8dcbaf 5.4.0-163-generic #180-Ubuntu SMP Tue Sep 5 13:21:23 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / 2c07847656 | | Default Java | Eclipse Adoptium-11.0.17+8 | | Test Results | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/4/testReport/ | | Max. process+thread count | 373 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/4/console | | versions | git=2.34.1 maven=3.8.6 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1865536893 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 25s | Docker mode activated. | | -0 :warning: | yetus | 0m 2s | Unprocessed flag(s): --brief-report-file --spotbugs-strict-precheck --whitespace-eol-ignore-list --whitespace-tabs-ignore-list --quick-hadoopcheck | ||| _ Prechecks _ | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 14s | master passed | | +1 :green_heart: | compile | 0m 14s | master passed | | +1 :green_heart: | shadedjars | 5m 3s | branch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 12s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 16s | the patch passed | | +1 :green_heart: | compile | 0m 15s | the patch passed | | +1 :green_heart: | javac | 0m 15s | the patch passed | | +1 :green_heart: | shadedjars | 5m 1s | patch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 12s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | unit | 1m 48s | hbase-common in the patch passed. | | | | 18m 38s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/4/artifact/yetus-jdk8-hadoop3-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | javac javadoc unit shadedjars compile | | uname | Linux fc15cf211260 5.4.0-1103-aws #111~18.04.1-Ubuntu SMP Tue May 23 20:04:10 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / 2c07847656 | | Default Java | Temurin-1.8.0_352-b08 | | Test Results | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/4/testReport/ | | Max. process+thread count | 367 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/4/console | | versions | git=2.34.1 maven=3.8.6 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1865463652 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 37s | Docker mode activated. | ||| _ Prechecks _ | | +1 :green_heart: | dupname | 0m 0s | No case conflicting files found. | | +1 :green_heart: | hbaseanti | 0m 0s | Patch does not have any anti-patterns. | | +1 :green_heart: | @author | 0m 0s | The patch does not contain any @author tags. | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 45s | master passed | | +1 :green_heart: | compile | 0m 34s | master passed | | +1 :green_heart: | checkstyle | 0m 17s | master passed | | +1 :green_heart: | spotless | 0m 43s | branch has no errors when running spotless:check. | | +1 :green_heart: | spotbugs | 0m 34s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 39s | the patch passed | | +1 :green_heart: | compile | 0m 32s | the patch passed | | +1 :green_heart: | javac | 0m 32s | the patch passed | | +1 :green_heart: | checkstyle | 0m 15s | the patch passed | | +1 :green_heart: | whitespace | 0m 0s | The patch has no whitespace issues. | | +1 :green_heart: | hadoopcheck | 9m 29s | Patch does not cause any errors with Hadoop 3.2.4 3.3.6. | | +1 :green_heart: | spotless | 0m 42s | patch has no errors when running spotless:check. | | +1 :green_heart: | spotbugs | 0m 39s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | asflicense | 0m 10s | The patch does not generate ASF License warnings. | | | | 25m 38s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/3/artifact/yetus-general-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | dupname asflicense javac spotbugs hadoopcheck hbaseanti spotless checkstyle compile | | uname | Linux c84cf5e572e5 5.4.0-163-generic #180-Ubuntu SMP Tue Sep 5 13:21:23 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / 2c07847656 | | Default Java | Eclipse Adoptium-11.0.17+8 | | Max. process+thread count | 79 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/3/console | | versions | git=2.34.1 maven=3.8.6 spotbugs=4.7.3 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1865461008 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 33s | Docker mode activated. | | -0 :warning: | yetus | 0m 2s | Unprocessed flag(s): --brief-report-file --spotbugs-strict-precheck --whitespace-eol-ignore-list --whitespace-tabs-ignore-list --quick-hadoopcheck | ||| _ Prechecks _ | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 56s | master passed | | +1 :green_heart: | compile | 0m 19s | master passed | | +1 :green_heart: | shadedjars | 4m 59s | branch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 17s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 49s | the patch passed | | +1 :green_heart: | compile | 0m 18s | the patch passed | | +1 :green_heart: | javac | 0m 18s | the patch passed | | +1 :green_heart: | shadedjars | 4m 53s | patch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 16s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | unit | 2m 19s | hbase-common in the patch passed. | | | | 20m 41s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/3/artifact/yetus-jdk11-hadoop3-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | javac javadoc unit shadedjars compile | | uname | Linux 319b1393414b 5.4.0-166-generic #183-Ubuntu SMP Mon Oct 2 11:28:33 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / 2c07847656 | | Default Java | Eclipse Adoptium-11.0.17+8 | | Test Results | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/3/testReport/ | | Max. process+thread count | 373 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/3/console | | versions | git=2.34.1 maven=3.8.6 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1865459813 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 24s | Docker mode activated. | | -0 :warning: | yetus | 0m 3s | Unprocessed flag(s): --brief-report-file --spotbugs-strict-precheck --whitespace-eol-ignore-list --whitespace-tabs-ignore-list --quick-hadoopcheck | ||| _ Prechecks _ | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 14s | master passed | | +1 :green_heart: | compile | 0m 14s | master passed | | +1 :green_heart: | shadedjars | 5m 2s | branch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 11s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 16s | the patch passed | | +1 :green_heart: | compile | 0m 14s | the patch passed | | +1 :green_heart: | javac | 0m 14s | the patch passed | | +1 :green_heart: | shadedjars | 5m 3s | patch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 12s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | unit | 1m 48s | hbase-common in the patch passed. | | | | 18m 37s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/3/artifact/yetus-jdk8-hadoop3-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | javac javadoc unit shadedjars compile | | uname | Linux f5a22b5aa79b 5.4.0-1103-aws #111~18.04.1-Ubuntu SMP Tue May 23 20:04:10 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / 2c07847656 | | Default Java | Temurin-1.8.0_352-b08 | | Test Results | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/3/testReport/ | | Max. process+thread count | 369 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/3/console | | versions | git=2.34.1 maven=3.8.6 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1865448798 Based on the most recent benchmarking results ([see JIRA comment](https://issues.apache.org/jira/browse/HBASE-28256?focusedCommentId=17799200=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17799200)), I went and converted the approach here to use an approach adapted from [HBASE-14186](https://issues.apache.org/jira/browse/HBASE-14186). This approach has microbenchmarks that outperform our current setup. Additionally, there's no regression in the smaller vLong range like there is with the first approach that I used in this patch and performance is similar to or slightly worse in the higher vLong range than the approach I originally presented. However, after I implement [HBASE-27730](https://issues.apache.org/jira/browse/HBASE-27730), this newer approach drastically outperforms my original approach. Additionally, since there is no performance regression I think that it's safe to just replace the body of the existing readVLong method with this new implementation. Thus, this patch can be back-ported to branch-3 & branch-2. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433255643 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: I also think after quick pass of the benchmarking numbers that the approach taken in https://issues.apache.org/jira/browse/HBASE-14186 is likely also the one we should adopt here -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433252640 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: Just posted the raw benchmarking results [in the JIRA in this comment](https://issues.apache.org/jira/browse/HBASE-28256?focusedCommentId=17799200=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17799200). I haven't had the chance to go fully over it yet, but my preliminary thought is that the optimization here and the recycler fix are worth it. It seems like they each yield like a 50% performance improvement over our current setup -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433252912 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: I can tackle https://issues.apache.org/jira/browse/HBASE-27730 after this issue -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
bbeaudreault commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433242660 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: I havent had time to implement the idea in HBASE-27730, but it should be a pretty easy change. Would you want to tackle it? Is the optimization here still worth it on top of the recycler fix? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433224938 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: The benchmarks are about halfway through running, so the results aren't totally settled yet. Just wanted to reiterate what a good find that was @bbeaudreault as the results when using ByteBuff's with the NONE recycler look to be substantially faster so far (in the 50% range). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433196386 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: That's a good callout. In lieu of that I'm going to update my microbenchmarks to also test on ByteBuff's with a NONE recycler (I'm intentionally not using the NONE recycler in my current microbenchmarks to simulate real region server operation). Here's the part of the flamegraph for `next()` that shows where the time is going for `readVLong` et al ![Screenshot 2023-12-20 at 4 17 19 PM](https://github.com/apache/hbase/assets/13725922/74566f11-9b42-48be-b99d-e15a561c43e1) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
bbeaudreault commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433154777 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: Oh, if the actual problem is the refcnt stuff, Duo and I had an idea for that a while ago: https://issues.apache.org/jira/browse/HBASE-27730. It was never implemented only because I was focused on the snapshot scan problem and we hadn't actually seen evidence of the problem elsewhere -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433097154 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: I'm not sure what approach is better between [HBASE-14186](https://issues.apache.org/jira/browse/HBASE-14186) and this approach. This approach has the benefit of in the average case (where the remaining size of the buffer contains 8 bytes) doing less operations on the underlying ByteBuff after checking the size byte regardless of the size of the vLong (3 ops). The ByteBuff operations are typically slow in flamegraphs not because the actual methods are expensive to evaluate, but because `checkRefCount()` is typically quite expensive. I can do some microbenchmarks comparing the current approach, [HBASE-14186](https://issues.apache.org/jira/browse/HBASE-14186)-like approach, and the approach taken in this patch against each other for vLongs of all sizes. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433090992 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: Yeah it looks like mvcc, memstoreTs, and sequenceId are all pretty much used interchangeably from a quick read. I've dug up a bit of literature on the topic in [HBASE-13389](https://issues.apache.org/jira/browse/HBASE-13389). To summarize: MVCC values stick around for a while and the performance hit associated with that is known. They're not timestamps, but they're also not likely to be super small. This was a known issue and the performance impact was previously solved with [HBASE-14861](https://issues.apache.org/jira/browse/HBASE-14186). That patch affects only HFiles without a data block encoding applied and works by attempting to eagerly read 4 bytes, then 2 bytes, and then falling back to single byte reads as opposed to the approach taken in this PR where we attempt to eagerly read 8 bytes. It's hard to say which approach is better, but I'm thinking we should at least apply similar optimizations to data block encodings. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1864978265 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 35s | Docker mode activated. | ||| _ Prechecks _ | | +1 :green_heart: | dupname | 0m 0s | No case conflicting files found. | | +1 :green_heart: | hbaseanti | 0m 0s | Patch does not have any anti-patterns. | | +1 :green_heart: | @author | 0m 0s | The patch does not contain any @author tags. | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 47s | master passed | | +1 :green_heart: | compile | 0m 35s | master passed | | +1 :green_heart: | checkstyle | 0m 17s | master passed | | +1 :green_heart: | spotless | 0m 44s | branch has no errors when running spotless:check. | | +1 :green_heart: | spotbugs | 0m 32s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 37s | the patch passed | | +1 :green_heart: | compile | 0m 33s | the patch passed | | +1 :green_heart: | javac | 0m 33s | the patch passed | | +1 :green_heart: | checkstyle | 0m 14s | the patch passed | | +1 :green_heart: | whitespace | 0m 0s | The patch has no whitespace issues. | | +1 :green_heart: | hadoopcheck | 9m 28s | Patch does not cause any errors with Hadoop 3.2.4 3.3.6. | | +1 :green_heart: | spotless | 0m 40s | patch has no errors when running spotless:check. | | +1 :green_heart: | spotbugs | 0m 40s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | asflicense | 0m 11s | The patch does not generate ASF License warnings. | | | | 25m 41s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/2/artifact/yetus-general-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | dupname asflicense javac spotbugs hadoopcheck hbaseanti spotless checkstyle compile | | uname | Linux 3be5ffe49a30 5.4.0-163-generic #180-Ubuntu SMP Tue Sep 5 13:21:23 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / 2c07847656 | | Default Java | Eclipse Adoptium-11.0.17+8 | | Max. process+thread count | 79 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/2/console | | versions | git=2.34.1 maven=3.8.6 spotbugs=4.7.3 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1864972846 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 26s | Docker mode activated. | | -0 :warning: | yetus | 0m 3s | Unprocessed flag(s): --brief-report-file --spotbugs-strict-precheck --whitespace-eol-ignore-list --whitespace-tabs-ignore-list --quick-hadoopcheck | ||| _ Prechecks _ | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 3m 0s | master passed | | +1 :green_heart: | compile | 0m 15s | master passed | | +1 :green_heart: | shadedjars | 5m 15s | branch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 16s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 39s | the patch passed | | +1 :green_heart: | compile | 0m 15s | the patch passed | | +1 :green_heart: | javac | 0m 15s | the patch passed | | +1 :green_heart: | shadedjars | 5m 11s | patch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 14s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | unit | 2m 12s | hbase-common in the patch passed. | | | | 20m 45s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/2/artifact/yetus-jdk11-hadoop3-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | javac javadoc unit shadedjars compile | | uname | Linux 21f2c3586ed3 5.4.0-1103-aws #111~18.04.1-Ubuntu SMP Tue May 23 20:04:10 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / 2c07847656 | | Default Java | Eclipse Adoptium-11.0.17+8 | | Test Results | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/2/testReport/ | | Max. process+thread count | 380 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/2/console | | versions | git=2.34.1 maven=3.8.6 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
bbeaudreault commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433067760 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: It seems like mvcc version and memstoreTs are sort of used interchangeably for the same thing. So I guess the above indicates that this patch is not going to only affect "timestamp-like" values. That said I do think that in most production environments the mvcc is probably pretty large (as above) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1864971776 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 26s | Docker mode activated. | | -0 :warning: | yetus | 0m 3s | Unprocessed flag(s): --brief-report-file --spotbugs-strict-precheck --whitespace-eol-ignore-list --whitespace-tabs-ignore-list --quick-hadoopcheck | ||| _ Prechecks _ | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 38s | master passed | | +1 :green_heart: | compile | 0m 14s | master passed | | +1 :green_heart: | shadedjars | 5m 17s | branch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 14s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 22s | the patch passed | | +1 :green_heart: | compile | 0m 14s | the patch passed | | +1 :green_heart: | javac | 0m 14s | the patch passed | | +1 :green_heart: | shadedjars | 5m 13s | patch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 13s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | unit | 1m 52s | hbase-common in the patch passed. | | | | 19m 46s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/2/artifact/yetus-jdk8-hadoop3-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | javac javadoc unit shadedjars compile | | uname | Linux 57028c8ccd1c 5.4.0-1103-aws #111~18.04.1-Ubuntu SMP Tue May 23 20:04:10 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / 2c07847656 | | Default Java | Temurin-1.8.0_352-b08 | | Test Results | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/2/testReport/ | | Max. process+thread count | 359 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/2/console | | versions | git=2.34.1 maven=3.8.6 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
bbeaudreault commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1433063478 ## hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java: ## @@ -226,7 +226,7 @@ public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includ int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { - long mvccVersion = ByteBufferUtils.readVLong(bb); + long mvccVersion = ByteBufferUtils.readVLongTimestamp(bb); Review Comment: FWIW I don't think the mvcc version is a timestamp. Just taking a look at a random server that just opened a region (where it logs the next mvcc version), the value was `564069676` Maybe we keep readVLong and only use the new method for actual memstoreTs? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1864921833 I've done the `readVLong` -> `readVLongTimestamp` refactor in https://github.com/apache/hbase/pull/5576/commits/9ad49c6623ce6f6d28609d72ac9010f3197e105c. For the reasons listed above, this PR is now only safe to apply to master and branch-3. After this PR is approved, I'll open up a backport PR for branch-2 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
bbeaudreault commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1864802424 For branch-2 in that case, I'd probably just create a new readVLongTimestamp method and annotate the old readVLong method as Deprecated. If a use-case comes up down the line, we can un-deprecate it. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1864799163 > I think rename it to readVLongTimestamp is good. That sounds good—I'll go down that path. @Apache9 & @bbeaudreault I have a question before proceeding with making those changes: in branch 2, ByteBufferUtils is an IA.Public and in branch-3/master it's IA.Private (see [HBASE-22044](https://issues.apache.org/jira/browse/HBASE-22044). That means that we can rename the method without worry in branch-3/master, but to backport to branch-2 (which I see no reason not to do) we would probably need to either: keep the existing readVLong methods in branch-2 and make the readVLongTimestamp methods completely separate (or possibly just have readVLong delegate to the new readVLongTimestamp methods. Any preference on how I approach these changes for branch-2 compatibility? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache9 commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1864505325 > It seems like some of the concern or confusion here is related to the generic name readVLong. If that method was used to read a _good use case_ for vlong, the performance at small byte sizes would matter. As it stands, it's only used in one _bad usecase_ the memstore ts. It's questionable whether a timestamp should be a vlong in the first place. > > Since this is only used for a timestamp use case, maybe we can move on from the small byte concerns. Would it make people feel better if we renamed the method to `readVLongTimestamp` with javadoc? @Apache9 > > (Edit: it may not be fair to call it a bad use case. What I mean is that if we care about saving bytes it feels like a timestamp could have a special encoding given the range of possible values, rather than vlong which is designed for all values) I think rename it to readVLongTimestamp is good. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
bbeaudreault commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1864420597 It seems like some of the concern or confusion here is related to the generic name readVLong. If that method was used to read a _good use case_ for vlong, the performance at small byte sizes would matter. As it stands, it's only used in one _bad usecase_ the memstore ts. It's questionable whether a timestamp should be a vlong in the first place. Since this is only used for a timestamp use case, maybe we can move on from the small byte concerns. Would it make people feel better if we renamed the method to `readVLongTimestamp` with javadoc? @Apache9 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1862076425 > But for 512(2 or 3 bytes?), the microbench result shows that there are some performance lost? It does look like there's a bit of a regression in the 512 case (3 bytes). I'm not as worried about that given that this codepath is only used in the HBase codebase to read memstore timestamps. I can add a comment to the method signature to warn to use `WritableUtils#readVLong` in the case that smaller vLongs are being read. The common use case for this codepath is going to be the 6 byte long/7 byte vLong decoding path which we're seeing the 40% performance increase on. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache9 commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1858750945 Looking at the code again, for one byte vlong, we still only read one byte first so should not have big differences. But for 512(2 or 3 bytes?), the microbench result shows that there are some performance lost? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache9 commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1858750380 http://www.w3.org/TR/REC-html40;> Benchmark | Value | Score | Error | Opt Score | Opt Error | Score Diff -- | -- | -- | -- | -- | -- | -- ReadVLongBenchmark.readVLong_OffHeapBB | 9 | 4.643 | 2.917 | 4.258 | 0.012 | -8.29% ReadVLongBenchmark.readVLong_OffHeapBB | 512 | 8.063 | 0.187 | 8.621 | 0.339 | 6.92% ReadVLongBenchmark.readVLong_OffHeapBB | 2146483640 | 11.999 | 0.314 | 12.481 | 2.609 | 4.02% ReadVLongBenchmark.readVLong_OffHeapBB | 1700104028981 | 14.880 | 0.698 | 14.211 | 0.041 | -4.50% ReadVLongBenchmark.readVLong_OffHeapBB_Padded | 9 | 4.233 | 0.136 | 4.222 | 0.007 | -0.26% ReadVLongBenchmark.readVLong_OffHeapBB_Padded | 512 | 7.986 | 0.048 | 8.830 | 0.022 | 10.57% ReadVLongBenchmark.readVLong_OffHeapBB_Padded | 2146483640 | 12.014 | 0.012 | 8.998 | 1.280 | -25.10% ReadVLongBenchmark.readVLong_OffHeapBB_Padded | 1700104028981 | 14.655 | 2.216 | 8.850 | 0.047 | -39.61% ReadVLongBenchmark.readVLong_OnHeapBB | 9 | 4.639 | 0.012 | 4.751 | 0.732 | 2.41% ReadVLongBenchmark.readVLong_OnHeapBB | 512 | 9.771 | 0.357 | 10.575 | 0.024 | 8.23% ReadVLongBenchmark.readVLong_OnHeapBB | 2146483640 | 13.928 | 0.557 | 14.231 | 0.385 | 2.18% ReadVLongBenchmark.readVLong_OnHeapBB | 1700104028981 | 17.487 | 4.527 | 17.252 | 0.064 | -1.34% ReadVLongBenchmark.readVLong_OnHeapBB_Padded | 9 | 5.245 | 0.019 | 4.680 | 0.108 | -10.77% ReadVLongBenchmark.readVLong_OnHeapBB_Padded | 512 | 10.086 | 0.317 | 9.719 | 1.401 | -3.64% ReadVLongBenchmark.readVLong_OnHeapBB_Padded | 2146483640 | 13.764 | 0.100 | 9.511 | 0.219 | -30.90% ReadVLongBenchmark.readVLong_OnHeapBB_Padded | 1700104028981 | 17.200 | 0.913 | 9.464 | 0.019 | -44.98% -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1858312138 > So the vint column in your table is the vlong value your read/write? Yes! The vint column in the table is the vlong value for the read/write (sorry for the confusing name). > Where 9 would be 1 byte, and 512 would be 2 bytes? Yeah 9 can be encoded with 1 byte, but 512 will require 3 bytes (1st byte is vLong size byte, since vLong can't fit completely inside a single byte, the first byte is just used to indicate size & sign and the next two bytes hold 512). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1428333485 ## hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java: ## @@ -468,38 +468,55 @@ public static void writeVLong(ByteBuffer out, long i) { } } - private interface ByteVisitor { -byte get(); - } - - private static long readVLong(ByteVisitor visitor) { -byte firstByte = visitor.get(); + /** Review Comment: When I was iterating on this, I did find that splitting out from the visitor increased performance a bit. I didn't save the intermediate JMH benchmark results/code (I can re-run the tests if you'd like, but I can certainly generate those numbers again. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache9 commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1857963730 So the vint column in your table is the vlong value your read/write? Where 9 would be 1 byte, and 512 would be 2 bytes? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache9 commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1428025477 ## hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java: ## @@ -468,38 +468,55 @@ public static void writeVLong(ByteBuffer out, long i) { } } - private interface ByteVisitor { -byte get(); - } - - private static long readVLong(ByteVisitor visitor) { -byte firstByte = visitor.get(); + /** Review Comment: I guess removing the visitor could also increase the performance a bit, maybe... -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
wchevreuil commented on code in PR #5576: URL: https://github.com/apache/hbase/pull/5576#discussion_r1425691507 ## hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java: ## @@ -468,38 +468,55 @@ public static void writeVLong(ByteBuffer out, long i) { } } - private interface ByteVisitor { -byte get(); - } - - private static long readVLong(ByteVisitor visitor) { -byte firstByte = visitor.get(); + /** Review Comment: Nit: could we keep using the Visitor pattern here? It seems both methods only differ in two lines. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1854065824 > In general, if we choose to use VLong, it means usually the value will be small, for example, only 1 or 2 bytes, so I wonder whether your JMH covers the most common scenario? Benchmark does cover a variety of vLong sizes. The microbenchmarks above have breakdowns for the following vLongs: 9, 512, 2146483640, and 1700104028981 which can be encoded as 1 byte, 3 byte, 5 byte, and 7 bytes respectively (so a wide range). In practice, ByteBufferUtils.readVLong is only used in the HBase codebase to decode the memstoreTs for the CopyKey, Diff, Fast Diff, Prefix, and RIV1 DBE encodings so performance when decoding a 7 byte vLong (an actual epoch millis timestamp is 6 bytes, but there is a 1 byte overhead for the vLong encoding) is the most important gauge of how this will affect performance. I agree that generally when vLong is used, the caller believes that a fair amount of the data will be smaller than 7 or 8 bytes so the vLong encoding saves space. However, in practice the ByteBufferUtils.readVLong method isn't used in that way. > For random long value which is encoded as VLong, I think read 8 bytes at once will be faster, but what if the values are often only 1 or 2 bytes? From what the benchmarks show, there is absolutely no regression at the 1 byte case as the code is identical up to that point. When a 2 byte vLong is read, the benchmarks show a small performance penalty. It seems like the cost of reading an 8 byte word on a modern machine is about the same cost as reading a single byte word, so performance hit isn't super drastic (at least as benchmarked on my machine). Again, because of how this particular method is used to decode vLongs in the HBase codebase, we should be focused on the performance for the 1700104028981 vLong decoding case as it most closely represents the performance for decoding a timestamp. In any case, if we were to start using vLongs more widely, the performance for the 2 byte case for the optimized method isn't really that much worse than the current behavior. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache9 commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1854034262 In general, if we choose to use VLong, it means usually the value will be small, for example, only 1 or 2 bytes, so I wonder whether your JMH covers the most common scenario? For random long value which is encoded as VLong, I think read 8 bytes at once will be faster, but what if the values are often only 1 or 2 bytes? Thanks. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1852263236 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 44s | Docker mode activated. | ||| _ Prechecks _ | | +1 :green_heart: | dupname | 0m 0s | No case conflicting files found. | | +1 :green_heart: | hbaseanti | 0m 0s | Patch does not have any anti-patterns. | | +1 :green_heart: | @author | 0m 0s | The patch does not contain any @author tags. | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 3m 35s | master passed | | +1 :green_heart: | compile | 0m 38s | master passed | | +1 :green_heart: | checkstyle | 0m 18s | master passed | | +1 :green_heart: | spotless | 1m 2s | branch has no errors when running spotless:check. | | +1 :green_heart: | spotbugs | 0m 38s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 3m 28s | the patch passed | | +1 :green_heart: | compile | 0m 41s | the patch passed | | +1 :green_heart: | javac | 0m 41s | the patch passed | | +1 :green_heart: | checkstyle | 0m 16s | the patch passed | | +1 :green_heart: | whitespace | 0m 0s | The patch has no whitespace issues. | | +1 :green_heart: | hadoopcheck | 11m 0s | Patch does not cause any errors with Hadoop 3.2.4 3.3.6. | | +1 :green_heart: | spotless | 0m 42s | patch has no errors when running spotless:check. | | +1 :green_heart: | spotbugs | 0m 43s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | asflicense | 0m 12s | The patch does not generate ASF License warnings. | | | | 30m 30s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/1/artifact/yetus-general-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | dupname asflicense javac spotbugs hadoopcheck hbaseanti spotless checkstyle compile | | uname | Linux 8b6daa1912a9 5.4.0-163-generic #180-Ubuntu SMP Tue Sep 5 13:21:23 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / f406c6bbf0 | | Default Java | Eclipse Adoptium-11.0.17+8 | | Max. process+thread count | 79 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/1/console | | versions | git=2.34.1 maven=3.8.6 spotbugs=4.7.3 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1852245556 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 32s | Docker mode activated. | | -0 :warning: | yetus | 0m 2s | Unprocessed flag(s): --brief-report-file --spotbugs-strict-precheck --whitespace-eol-ignore-list --whitespace-tabs-ignore-list --quick-hadoopcheck | ||| _ Prechecks _ | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 3m 11s | master passed | | +1 :green_heart: | compile | 0m 18s | master passed | | +1 :green_heart: | shadedjars | 5m 55s | branch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 17s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 54s | the patch passed | | +1 :green_heart: | compile | 0m 18s | the patch passed | | +1 :green_heart: | javac | 0m 18s | the patch passed | | +1 :green_heart: | shadedjars | 6m 19s | patch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 15s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | unit | 2m 15s | hbase-common in the patch passed. | | | | 23m 23s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/1/artifact/yetus-jdk8-hadoop3-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | javac javadoc unit shadedjars compile | | uname | Linux 06f81e5a7ff2 5.4.0-1103-aws #111~18.04.1-Ubuntu SMP Tue May 23 20:04:10 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / f406c6bbf0 | | Default Java | Temurin-1.8.0_352-b08 | | Test Results | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/1/testReport/ | | Max. process+thread count | 364 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/1/console | | versions | git=2.34.1 maven=3.8.6 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
Apache-HBase commented on PR #5576: URL: https://github.com/apache/hbase/pull/5576#issuecomment-1852239867 :confetti_ball: **+1 overall** | Vote | Subsystem | Runtime | Comment | |::|--:|:|:| | +0 :ok: | reexec | 0m 10s | Docker mode activated. | | -0 :warning: | yetus | 0m 3s | Unprocessed flag(s): --brief-report-file --spotbugs-strict-precheck --whitespace-eol-ignore-list --whitespace-tabs-ignore-list --quick-hadoopcheck | ||| _ Prechecks _ | ||| _ master Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 53s | master passed | | +1 :green_heart: | compile | 0m 17s | master passed | | +1 :green_heart: | shadedjars | 4m 55s | branch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 18s | master passed | ||| _ Patch Compile Tests _ | | +1 :green_heart: | mvninstall | 2m 42s | the patch passed | | +1 :green_heart: | compile | 0m 18s | the patch passed | | +1 :green_heart: | javac | 0m 18s | the patch passed | | +1 :green_heart: | shadedjars | 4m 47s | patch has no errors when building our shaded downstream artifacts. | | +1 :green_heart: | javadoc | 0m 16s | the patch passed | ||| _ Other Tests _ | | +1 :green_heart: | unit | 2m 17s | hbase-common in the patch passed. | | | | 19m 58s | | | Subsystem | Report/Notes | |--:|:-| | Docker | ClientAPI=1.43 ServerAPI=1.43 base: https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/1/artifact/yetus-jdk11-hadoop3-check/output/Dockerfile | | GITHUB PR | https://github.com/apache/hbase/pull/5576 | | Optional Tests | javac javadoc unit shadedjars compile | | uname | Linux bc7e61485c84 5.4.0-166-generic #183-Ubuntu SMP Mon Oct 2 11:28:33 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | dev-support/hbase-personality.sh | | git revision | master / f406c6bbf0 | | Default Java | Eclipse Adoptium-11.0.17+8 | | Test Results | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/1/testReport/ | | Max. process+thread count | 386 (vs. ulimit of 3) | | modules | C: hbase-common U: hbase-common | | Console output | https://ci-hbase.apache.org/job/HBase-PreCommit-GitHub-PR/job/PR-5576/1/console | | versions | git=2.34.1 maven=3.8.6 | | Powered by | Apache Yetus 0.12.0 https://yetus.apache.org | This message was automatically generated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[PR] HBASE-28256 Enhance ByteBufferUtils.readVLong to read 8 bytes at a time [hbase]
jbewing opened a new pull request, #5576: URL: https://github.com/apache/hbase/pull/5576 ### What This PR updates `ByteBufferUtils.readVLong` to attempt to read 8 bytes from the underlying buffer (if able too) when reading vLongs instead of a single byte at a time to increase performance. ### Implementation Notes Previously, these methods relied on a `ByteVisitor` interface which abstracted the `byte get()` method between a Java ByteBuffer and an HBase ByteBuff. To make these updates, I needed access to a larger variety of methods from both ByteBuf and ByteBuffer. Instead of continuing to use the the `ByteVisitor` abstraction, I split the implementations. They share a bit of common code but have distinct differences now. I believe splitting the call site from `ByteVisitor` to a separate one for `ByteBuffer` and `ByteBuff` enables the JIT compiler to generate slightly better code as the call sites are now each bimorphic. ### Benchmarking I wrote a JMH benchmarking suite (I'll attach the code to the JIRA) that measures the performance of reading various vLongs from both on & off heap buffers with and without padding (padding meaning that the buffer is at least 9 bytes long so that the vectorized read path can be applied). That is to say, for the unoptimized readVLong path, padding shouldn't incur any performance penalty, but with the optimized method only the padded benchmarks should show a substantial performance improvement (and they do show a pretty nice performance improvement). ``` Benchmark (vint) Mode Cnt Score Error Units ReadVLongBenchmark.readVLong_OffHeapBB 9 avgt 5 4.643 ± 2.917 ns/op ReadVLongBenchmark.readVLong_OffHeapBB 512 avgt 5 8.063 ± 0.187 ns/op ReadVLongBenchmark.readVLong_OffHeapBB 2146483640 avgt 5 11.999 ± 0.314 ns/op ReadVLongBenchmark.readVLong_OffHeapBB 1700104028981 avgt 5 14.880 ± 0.698 ns/op ReadVLongBenchmark.readVLong_OffHeapBB_Padded9 avgt 5 4.233 ± 0.136 ns/op ReadVLongBenchmark.readVLong_OffHeapBB_Padded 512 avgt 5 7.986 ± 0.048 ns/op ReadVLongBenchmark.readVLong_OffHeapBB_Padded 2146483640 avgt 5 12.014 ± 0.012 ns/op ReadVLongBenchmark.readVLong_OffHeapBB_Padded1700104028981 avgt 5 14.655 ± 2.216 ns/op ReadVLongBenchmark.readVLong_OnHeapBB9 avgt 5 4.639 ± 0.012 ns/op ReadVLongBenchmark.readVLong_OnHeapBB 512 avgt 5 9.771 ± 0.357 ns/op ReadVLongBenchmark.readVLong_OnHeapBB 2146483640 avgt 5 13.928 ± 0.557 ns/op ReadVLongBenchmark.readVLong_OnHeapBB1700104028981 avgt 5 17.487 ± 4.527 ns/op ReadVLongBenchmark.readVLong_OnHeapBB_Padded 9 avgt 5 5.245 ± 0.019 ns/op ReadVLongBenchmark.readVLong_OnHeapBB_Padded 512 avgt 5 10.086 ± 0.317 ns/op ReadVLongBenchmark.readVLong_OnHeapBB_Padded2146483640 avgt 5 13.764 ± 0.100 ns/op ReadVLongBenchmark.readVLong_OnHeapBB_Padded 1700104028981 avgt 5 17.200 ± 0.913 ns/op ReadVLongBenchmark.readVLong_Optimized_OffHeapBB 9 avgt 5 4.258 ± 0.012 ns/op ReadVLongBenchmark.readVLong_Optimized_OffHeapBB 512 avgt 5 8.621 ± 0.339 ns/op ReadVLongBenchmark.readVLong_Optimized_OffHeapBB2146483640 avgt 5 12.481 ± 2.609 ns/op ReadVLongBenchmark.readVLong_Optimized_OffHeapBB 1700104028981 avgt 5 14.211 ± 0.041 ns/op ReadVLongBenchmark.readVLong_Optimized_OffHeapBB_Padded 9 avgt 5 4.222 ± 0.007 ns/op ReadVLongBenchmark.readVLong_Optimized_OffHeapBB_Padded512 avgt 5 8.830 ± 0.022 ns/op ReadVLongBenchmark.readVLong_Optimized_OffHeapBB_Padded 2146483640 avgt 5 8.998 ± 1.280 ns/op ReadVLongBenchmark.readVLong_Optimized_OffHeapBB_Padded 1700104028981 avgt 5 8.850 ± 0.047 ns/op ReadVLongBenchmark.readVLong_Optimized_OnHeapBB 9 avgt 5 4.751 ± 0.732 ns/op ReadVLongBenchmark.readVLong_Optimized_OnHeapBB512 avgt 5 10.575 ± 0.024 ns/op ReadVLongBenchmark.readVLong_Optimized_OnHeapBB 2146483640 avgt 5 14.231 ± 0.385 ns/op ReadVLongBenchmark.readVLong_Optimized_OnHeapBB 1700104028981 avgt 5 17.252 ± 0.064 ns/op ReadVLongBenchmark.readVLong_Optimized_OnHeapBB_Padded 9 avgt 5 4.680 ± 0.108 ns/op ReadVLongBenchmark.readVLong_Optimized_OnHeapBB_Padded 512 avgt 5 9.719 ± 1.401 ns/op