This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git


The following commit(s) were added to refs/heads/main by this push:
     new c531d84b perf(java): Optimize Computational Efficiency of 
MetaStringEncoder::encodeGeneric (#2072)
c531d84b is described below

commit c531d84b5b6303c0b602b56ade7cc947a94d0ea2
Author: 娄少昆 <[email protected]>
AuthorDate: Wed Feb 19 19:37:59 2025 +0800

    perf(java): Optimize Computational Efficiency of 
MetaStringEncoder::encodeGeneric (#2072)
    
    <!--
    **Thanks for contributing to Fury.**
    
    **If this is your first time opening a PR on fury, you can refer to
    
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).**
    
    Contribution Checklist
    
    - The **Apache Fury (incubating)** community has restrictions on the
    naming of pr titles. You can also find instructions in
    [CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).
    
    - Fury has a strong focus on performance. If the PR you submit will have
    an impact on performance, please benchmark it first and provide the
    benchmark result here.
    -->
    
    ## What does this PR do?
    
    In this update, the bit manipulation logic within the
    MetaStringEncoder::encodeGeneric method has been optimized for better
    performance. The original implementation processed the bits one at a
    time in each loop iteration, updating only a single bit per cycle. The
    new approach improves efficiency by processing multiple bits (up to one
    byte) per loop iteration, reducing the number of iterations required and
    minimizing the overhead of bit-level operations.
    
    ## Related issues
    
    <!--
    Is there any related issue? Please attach here.
    
    - #xxxx0
    - #xxxx1
    - #xxxx2
    -->
    
    ## Does this PR introduce any user-facing change?
    
    <!--
    If any user-facing interface changes, please [open an
    issue](https://github.com/apache/fury/issues/new/choose) describing the
    need to do so and update the document if necessary.
    -->
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    | Test # | encodeGeneric (ms) | new encodeGeneric (ms) |
    |--------|--------------------|---------------------|
    | 1      | 102                | 27                  |
    | 2      | 85                 | 29                  |
    | 3      | 86                 | 29                  |
    | 4      | 81                 | 26                  |
    | 5      | 94                 | 33                  |
    | **Average** | **89.6**             | **28.8**             |
    
    
    here is the simple testing code:
    ```java
    package org.example;
    
    public class Main {
      private static int charToValueLowerSpecial(char c) {
        return 127;
      }
    
      private static int charToValueLowerUpperDigitSpecial(char c) {
        return 127;
      }
    
      static private byte[] encodeGeneric2(char[] chars, int bitsPerChar) {
        int totalBits = chars.length * bitsPerChar + 1;
        int byteLength = (totalBits + 7) / 8;
        byte[] bytes = new byte[byteLength];
        int byteInd = 0;
        int bitInd = 1;  // Start from the second bit (the first is reserved 
for the flag)
        int charInd = 0;
        int charBitRemain = bitsPerChar;  // Remaining bits to process for the 
current character
        int mask;
        while (charInd < chars.length) {
          int charVal = (bitsPerChar == 5) ? 
charToValueLowerSpecial(chars[charInd])
            : charToValueLowerUpperDigitSpecial(chars[charInd]);
          // Calculate how many bits are remaining in the current byte
          int nowByteRemain = 8 - bitInd;
          if (nowByteRemain >= charBitRemain) {
            // If the remaining bits in the current byte can fit the whole 
character value
            mask = (1 << charBitRemain) - 1;  // Create a mask for the bits of 
the character
            bytes[byteInd] |= (byte) ((charVal & mask) << (nowByteRemain - 
charBitRemain));  // Place the character bits into the byte
            bitInd += charBitRemain;
            if (bitInd == 8) {
              // Move to the next byte if the current byte is filled
              ++byteInd;
              bitInd = 0;
            }
            // Character has been fully placed in the current byte, move to the 
next character
            ++charInd;
            charBitRemain = bitsPerChar;  // Reset the remaining bits for the 
next character
          } else {
            // If the remaining bits in the current byte are not enough to hold 
the whole character
            mask = (1 << nowByteRemain) - 1;  // Create a mask for the current 
available bits in the byte
            bytes[byteInd] |= (byte) ((charVal >> (charBitRemain - 
nowByteRemain)) & mask);  // Place part of the character bits into the byte
            ++byteInd;  // Move to the next byte
            bitInd = 0;  // Reset bit index for the new byte
            charBitRemain -= nowByteRemain;  // Decrease the remaining bits for 
the character
          }
        }
    
        boolean stripLastChar = bytes.length * 8 >= totalBits + bitsPerChar;
        if (stripLastChar) {
          // Mark the first byte as indicating a stripped character
          bytes[0] = (byte) (bytes[0] | 0x80);
        }
        return bytes;
      }
    
      static private byte[] encodeGeneric(char[] chars, int bitsPerChar) {
        int totalBits = chars.length * bitsPerChar + 1;
        int byteLength = (totalBits + 7) / 8; // Calculate number of needed 
bytes
        byte[] bytes = new byte[byteLength];
        int currentBit = 1;
        for (char c : chars) {
          int value =
            (bitsPerChar == 5) ? charToValueLowerSpecial(c) : 
charToValueLowerUpperDigitSpecial(c);
          // Encode the value in bitsPerChar bits
          for (int i = bitsPerChar - 1; i >= 0; i--) {
            if ((value & (1 << i)) != 0) {
              // Set the bit in the byte array
              int bytePos = currentBit / 8;
              int bitPos = currentBit % 8;
              bytes[bytePos] |= (byte) (1 << (7 - bitPos));
            }
            currentBit++;
          }
        }
        boolean stripLastChar = bytes.length * 8 >= totalBits + bitsPerChar;
        if (stripLastChar) {
          bytes[0] = (byte) (bytes[0] | 0x80);
        }
        return bytes;
      }
    
      static private boolean bytesEqual(byte[] bytes1, byte[] bytes2) {
        if (bytes1.length != bytes2.length) {
          return false;
        }
        for (int i = 0; i < bytes1.length; ++i) {
          if (bytes1[i] != bytes2[i]) {
            return false;
          }
        }
        return true;
      }
    
      public static void main(String[] args) {
        test();
      }
    
      // test performance comparison encodeGeneric2 vs encodeGeneric
      public static void test() {
        char[] chars = new char[10000000];
        for (int i = 0; i < chars.length; i++) {
          // random
          chars[i] = (char) (Math.random() * 127);
        }
        long start = System.currentTimeMillis();
        byte[] bytes = encodeGeneric(chars, 5);
        System.out.println("encodeGeneric: " + (System.currentTimeMillis() - 
start) + "ms");
        start = System.currentTimeMillis();
        byte[] bytes2 = encodeGeneric2(chars, 5);
        System.out.println("encodeGeneric2: " + (System.currentTimeMillis() - 
start) + "ms");
        assert bytes.length == bytes2.length && bytesEqual(bytes, bytes2);
      }
    }
    ```
---
 .../org/apache/fury/meta/MetaStringEncoder.java    | 51 ++++++++++++++++------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git 
a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java 
b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
index 90298e8e..396721f6 100644
--- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
+++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
@@ -246,25 +246,50 @@ public class MetaStringEncoder {
 
   private byte[] encodeGeneric(char[] chars, int bitsPerChar) {
     int totalBits = chars.length * bitsPerChar + 1;
-    int byteLength = (totalBits + 7) / 8; // Calculate number of needed bytes
+    int byteLength = (totalBits + 7) / 8;
     byte[] bytes = new byte[byteLength];
-    int currentBit = 1;
-    for (char c : chars) {
-      int value =
-          (bitsPerChar == 5) ? charToValueLowerSpecial(c) : 
charToValueLowerUpperDigitSpecial(c);
-      // Encode the value in bitsPerChar bits
-      for (int i = bitsPerChar - 1; i >= 0; i--) {
-        if ((value & (1 << i)) != 0) {
-          // Set the bit in the byte array
-          int bytePos = currentBit / 8;
-          int bitPos = currentBit % 8;
-          bytes[bytePos] |= (byte) (1 << (7 - bitPos));
+    int byteInd = 0;
+    int bitInd = 1; // Start from the second bit (the first is reserved for 
the flag)
+    int charInd = 0;
+    int charBitRemain = bitsPerChar; // Remaining bits to process for the 
current character
+    int mask;
+    while (charInd < chars.length) {
+      int charVal =
+          (bitsPerChar == 5)
+              ? charToValueLowerSpecial(chars[charInd])
+              : charToValueLowerUpperDigitSpecial(chars[charInd]);
+      // Calculate how many bits are remaining in the current byte
+      int nowByteRemain = 8 - bitInd;
+      if (nowByteRemain >= charBitRemain) {
+        // If the remaining bits in the current byte can fit the whole 
character value
+        mask = (1 << charBitRemain) - 1; // Create a mask for the bits of the 
character
+        bytes[byteInd] |=
+            (byte)
+                ((charVal & mask)
+                    << (nowByteRemain - charBitRemain)); // Place the 
character bits into the byte
+        bitInd += charBitRemain;
+        if (bitInd == 8) {
+          ++byteInd;
+          bitInd = 0;
         }
-        currentBit++;
+        ++charInd;
+        charBitRemain = bitsPerChar; // Reset the remaining bits for the next 
character
+      } else {
+        // If the remaining bits in the current byte are not enough to hold 
the whole character
+        mask = (1 << nowByteRemain) - 1; // Create a mask for the current 
available bits in the byte
+        bytes[byteInd] |=
+            (byte)
+                ((charVal >> (charBitRemain - nowByteRemain))
+                    & mask); // Place part of the character bits into the byte
+        ++byteInd; // Move to the next byte
+        bitInd = 0; // Reset bit index for the new byte
+        charBitRemain -= nowByteRemain; // Decrease the remaining bits for the 
character
       }
     }
+
     boolean stripLastChar = bytes.length * 8 >= totalBits + bitsPerChar;
     if (stripLastChar) {
+      // Mark the first byte as indicating a stripped character
       bytes[0] = (byte) (bytes[0] | 0x80);
     }
     return bytes;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to