This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new c531d84b perf(java): Optimize Computational Efficiency of
MetaStringEncoder::encodeGeneric (#2072)
c531d84b is described below
commit c531d84b5b6303c0b602b56ade7cc947a94d0ea2
Author: 娄少昆 <[email protected]>
AuthorDate: Wed Feb 19 19:37:59 2025 +0800
perf(java): Optimize Computational Efficiency of
MetaStringEncoder::encodeGeneric (#2072)
<!--
**Thanks for contributing to Fury.**
**If this is your first time opening a PR on fury, you can refer to
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).**
Contribution Checklist
- The **Apache Fury (incubating)** community has restrictions on the
naming of pr titles. You can also find instructions in
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).
- Fury has a strong focus on performance. If the PR you submit will have
an impact on performance, please benchmark it first and provide the
benchmark result here.
-->
## What does this PR do?
In this update, the bit manipulation logic within the
MetaStringEncoder::encodeGeneric method has been optimized for better
performance. The original implementation processed the bits one at a
time in each loop iteration, updating only a single bit per cycle. The
new approach improves efficiency by processing multiple bits (up to one
byte) per loop iteration, reducing the number of iterations required and
minimizing the overhead of bit-level operations.
## Related issues
<!--
Is there any related issue? Please attach here.
- #xxxx0
- #xxxx1
- #xxxx2
-->
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fury/issues/new/choose) describing the
need to do so and update the document if necessary.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
| Test # | encodeGeneric (ms) | new encodeGeneric (ms) |
|--------|--------------------|---------------------|
| 1 | 102 | 27 |
| 2 | 85 | 29 |
| 3 | 86 | 29 |
| 4 | 81 | 26 |
| 5 | 94 | 33 |
| **Average** | **89.6** | **28.8** |
here is the simple testing code:
```java
package org.example;
public class Main {
private static int charToValueLowerSpecial(char c) {
return 127;
}
private static int charToValueLowerUpperDigitSpecial(char c) {
return 127;
}
static private byte[] encodeGeneric2(char[] chars, int bitsPerChar) {
int totalBits = chars.length * bitsPerChar + 1;
int byteLength = (totalBits + 7) / 8;
byte[] bytes = new byte[byteLength];
int byteInd = 0;
int bitInd = 1; // Start from the second bit (the first is reserved
for the flag)
int charInd = 0;
int charBitRemain = bitsPerChar; // Remaining bits to process for the
current character
int mask;
while (charInd < chars.length) {
int charVal = (bitsPerChar == 5) ?
charToValueLowerSpecial(chars[charInd])
: charToValueLowerUpperDigitSpecial(chars[charInd]);
// Calculate how many bits are remaining in the current byte
int nowByteRemain = 8 - bitInd;
if (nowByteRemain >= charBitRemain) {
// If the remaining bits in the current byte can fit the whole
character value
mask = (1 << charBitRemain) - 1; // Create a mask for the bits of
the character
bytes[byteInd] |= (byte) ((charVal & mask) << (nowByteRemain -
charBitRemain)); // Place the character bits into the byte
bitInd += charBitRemain;
if (bitInd == 8) {
// Move to the next byte if the current byte is filled
++byteInd;
bitInd = 0;
}
// Character has been fully placed in the current byte, move to the
next character
++charInd;
charBitRemain = bitsPerChar; // Reset the remaining bits for the
next character
} else {
// If the remaining bits in the current byte are not enough to hold
the whole character
mask = (1 << nowByteRemain) - 1; // Create a mask for the current
available bits in the byte
bytes[byteInd] |= (byte) ((charVal >> (charBitRemain -
nowByteRemain)) & mask); // Place part of the character bits into the byte
++byteInd; // Move to the next byte
bitInd = 0; // Reset bit index for the new byte
charBitRemain -= nowByteRemain; // Decrease the remaining bits for
the character
}
}
boolean stripLastChar = bytes.length * 8 >= totalBits + bitsPerChar;
if (stripLastChar) {
// Mark the first byte as indicating a stripped character
bytes[0] = (byte) (bytes[0] | 0x80);
}
return bytes;
}
static private byte[] encodeGeneric(char[] chars, int bitsPerChar) {
int totalBits = chars.length * bitsPerChar + 1;
int byteLength = (totalBits + 7) / 8; // Calculate number of needed
bytes
byte[] bytes = new byte[byteLength];
int currentBit = 1;
for (char c : chars) {
int value =
(bitsPerChar == 5) ? charToValueLowerSpecial(c) :
charToValueLowerUpperDigitSpecial(c);
// Encode the value in bitsPerChar bits
for (int i = bitsPerChar - 1; i >= 0; i--) {
if ((value & (1 << i)) != 0) {
// Set the bit in the byte array
int bytePos = currentBit / 8;
int bitPos = currentBit % 8;
bytes[bytePos] |= (byte) (1 << (7 - bitPos));
}
currentBit++;
}
}
boolean stripLastChar = bytes.length * 8 >= totalBits + bitsPerChar;
if (stripLastChar) {
bytes[0] = (byte) (bytes[0] | 0x80);
}
return bytes;
}
static private boolean bytesEqual(byte[] bytes1, byte[] bytes2) {
if (bytes1.length != bytes2.length) {
return false;
}
for (int i = 0; i < bytes1.length; ++i) {
if (bytes1[i] != bytes2[i]) {
return false;
}
}
return true;
}
public static void main(String[] args) {
test();
}
// test performance comparison encodeGeneric2 vs encodeGeneric
public static void test() {
char[] chars = new char[10000000];
for (int i = 0; i < chars.length; i++) {
// random
chars[i] = (char) (Math.random() * 127);
}
long start = System.currentTimeMillis();
byte[] bytes = encodeGeneric(chars, 5);
System.out.println("encodeGeneric: " + (System.currentTimeMillis() -
start) + "ms");
start = System.currentTimeMillis();
byte[] bytes2 = encodeGeneric2(chars, 5);
System.out.println("encodeGeneric2: " + (System.currentTimeMillis() -
start) + "ms");
assert bytes.length == bytes2.length && bytesEqual(bytes, bytes2);
}
}
```
---
.../org/apache/fury/meta/MetaStringEncoder.java | 51 ++++++++++++++++------
1 file changed, 38 insertions(+), 13 deletions(-)
diff --git
a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
index 90298e8e..396721f6 100644
--- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
+++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
@@ -246,25 +246,50 @@ public class MetaStringEncoder {
private byte[] encodeGeneric(char[] chars, int bitsPerChar) {
int totalBits = chars.length * bitsPerChar + 1;
- int byteLength = (totalBits + 7) / 8; // Calculate number of needed bytes
+ int byteLength = (totalBits + 7) / 8;
byte[] bytes = new byte[byteLength];
- int currentBit = 1;
- for (char c : chars) {
- int value =
- (bitsPerChar == 5) ? charToValueLowerSpecial(c) :
charToValueLowerUpperDigitSpecial(c);
- // Encode the value in bitsPerChar bits
- for (int i = bitsPerChar - 1; i >= 0; i--) {
- if ((value & (1 << i)) != 0) {
- // Set the bit in the byte array
- int bytePos = currentBit / 8;
- int bitPos = currentBit % 8;
- bytes[bytePos] |= (byte) (1 << (7 - bitPos));
+ int byteInd = 0;
+ int bitInd = 1; // Start from the second bit (the first is reserved for
the flag)
+ int charInd = 0;
+ int charBitRemain = bitsPerChar; // Remaining bits to process for the
current character
+ int mask;
+ while (charInd < chars.length) {
+ int charVal =
+ (bitsPerChar == 5)
+ ? charToValueLowerSpecial(chars[charInd])
+ : charToValueLowerUpperDigitSpecial(chars[charInd]);
+ // Calculate how many bits are remaining in the current byte
+ int nowByteRemain = 8 - bitInd;
+ if (nowByteRemain >= charBitRemain) {
+ // If the remaining bits in the current byte can fit the whole
character value
+ mask = (1 << charBitRemain) - 1; // Create a mask for the bits of the
character
+ bytes[byteInd] |=
+ (byte)
+ ((charVal & mask)
+ << (nowByteRemain - charBitRemain)); // Place the
character bits into the byte
+ bitInd += charBitRemain;
+ if (bitInd == 8) {
+ ++byteInd;
+ bitInd = 0;
}
- currentBit++;
+ ++charInd;
+ charBitRemain = bitsPerChar; // Reset the remaining bits for the next
character
+ } else {
+ // If the remaining bits in the current byte are not enough to hold
the whole character
+ mask = (1 << nowByteRemain) - 1; // Create a mask for the current
available bits in the byte
+ bytes[byteInd] |=
+ (byte)
+ ((charVal >> (charBitRemain - nowByteRemain))
+ & mask); // Place part of the character bits into the byte
+ ++byteInd; // Move to the next byte
+ bitInd = 0; // Reset bit index for the new byte
+ charBitRemain -= nowByteRemain; // Decrease the remaining bits for the
character
}
}
+
boolean stripLastChar = bytes.length * 8 >= totalBits + bitsPerChar;
if (stripLastChar) {
+ // Mark the first byte as indicating a stripped character
bytes[0] = (byte) (bytes[0] | 0x80);
}
return bytes;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]