LouisLou2 opened a new pull request, #2072: URL: https://github.com/apache/fury/pull/2072
<!-- **Thanks for contributing to Fury.** **If this is your first time opening a PR on fury, you can refer to [CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).** Contribution Checklist - The **Apache Fury (incubating)** community has restrictions on the naming of pr titles. You can also find instructions in [CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md). - Fury has a strong focus on performance. If the PR you submit will have an impact on performance, please benchmark it first and provide the benchmark result here. --> ## What does this PR do? In this update, the bit manipulation logic within the MetaStringEncoder::encodeGeneric method has been optimized for better performance. The original implementation processed the bits one at a time in each loop iteration, updating only a single bit per cycle. The new approach improves efficiency by processing multiple bits (up to one byte) per loop iteration, reducing the number of iterations required and minimizing the overhead of bit-level operations. ## Related issues <!-- Is there any related issue? Please attach here. - #xxxx0 - #xxxx1 - #xxxx2 --> ## Does this PR introduce any user-facing change? <!-- If any user-facing interface changes, please [open an issue](https://github.com/apache/fury/issues/new/choose) describing the need to do so and update the document if necessary. --> - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark | Test # | encodeGeneric (ms) | new encodeGeneric (ms) | |--------|--------------------|---------------------| | 1 | 102 | 27 | | 2 | 85 | 29 | | 3 | 86 | 29 | | 4 | 81 | 26 | | 5 | 94 | 33 | | **Average** | **89.6** | **28.8** | here is the simple testing code: ```java package org.example; public class Main { private static int charToValueLowerSpecial(char c) { return 127; } private static int charToValueLowerUpperDigitSpecial(char c) { return 127; } static private byte[] encodeGeneric2(char[] chars, int bitsPerChar) { int totalBits = chars.length * bitsPerChar + 1; int byteLength = (totalBits + 7) / 8; byte[] bytes = new byte[byteLength]; int byteInd = 0; int bitInd = 1; // Start from the second bit (the first is reserved for the flag) int charInd = 0; int charBitRemain = bitsPerChar; // Remaining bits to process for the current character int mask; do { int charVal = (bitsPerChar == 5) ? charToValueLowerSpecial(chars[charInd]) : charToValueLowerUpperDigitSpecial(chars[charInd]); // Calculate how many bits are remaining in the current byte int nowByteRemain = 8 - bitInd; if (nowByteRemain >= charBitRemain) { // If the remaining bits in the current byte can fit the whole character value mask = (1 << charBitRemain) - 1; // Create a mask for the bits of the character bytes[byteInd] |= (byte) ((charVal & mask) << (nowByteRemain - charBitRemain)); // Place the character bits into the byte bitInd += charBitRemain; if (bitInd == 8) { // Move to the next byte if the current byte is filled ++byteInd; bitInd = 0; } // Character has been fully placed in the current byte, move to the next character ++charInd; charBitRemain = bitsPerChar; // Reset the remaining bits for the next character } else { // If the remaining bits in the current byte are not enough to hold the whole character mask = (1 << nowByteRemain) - 1; // Create a mask for the current available bits in the byte bytes[byteInd] |= (byte) ((charVal >> (charBitRemain - nowByteRemain)) & mask); // Place part of the character bits into the byte ++byteInd; // Move to the next byte bitInd = 0; // Reset bit index for the new byte charBitRemain -= nowByteRemain; // Decrease the remaining bits for the character } } while (charInd < chars.length); boolean stripLastChar = bytes.length * 8 >= totalBits + bitsPerChar; if (stripLastChar) { // Mark the first byte as indicating a stripped character bytes[0] = (byte) (bytes[0] | 0x80); } return bytes; } static private byte[] encodeGeneric(char[] chars, int bitsPerChar) { int totalBits = chars.length * bitsPerChar + 1; int byteLength = (totalBits + 7) / 8; // Calculate number of needed bytes byte[] bytes = new byte[byteLength]; int currentBit = 1; for (char c : chars) { int value = (bitsPerChar == 5) ? charToValueLowerSpecial(c) : charToValueLowerUpperDigitSpecial(c); // Encode the value in bitsPerChar bits for (int i = bitsPerChar - 1; i >= 0; i--) { if ((value & (1 << i)) != 0) { // Set the bit in the byte array int bytePos = currentBit / 8; int bitPos = currentBit % 8; bytes[bytePos] |= (byte) (1 << (7 - bitPos)); } currentBit++; } } boolean stripLastChar = bytes.length * 8 >= totalBits + bitsPerChar; if (stripLastChar) { bytes[0] = (byte) (bytes[0] | 0x80); } return bytes; } static private boolean bytesEqual(byte[] bytes1, byte[] bytes2) { if (bytes1.length != bytes2.length) { return false; } for (int i = 0; i < bytes1.length; ++i) { if (bytes1[i] != bytes2[i]) { return false; } } return true; } public static void main(String[] args) { test(); } // test performance comparison encodeGeneric2 vs encodeGeneric public static void test() { char[] chars = new char[10000000]; for (int i = 0; i < chars.length; i++) { // random chars[i] = (char) (Math.random() * 127); } long start = System.currentTimeMillis(); byte[] bytes = encodeGeneric(chars, 5); System.out.println("encodeGeneric: " + (System.currentTimeMillis() - start) + "ms"); start = System.currentTimeMillis(); byte[] bytes2 = encodeGeneric2(chars, 5); System.out.println("encodeGeneric2: " + (System.currentTimeMillis() - start) + "ms"); assert bytes.length == bytes2.length && bytesEqual(bytes, bytes2); } } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
