This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch releases-0.10
in repository https://gitbox.apache.org/repos/asf/fury.git

commit b1ed2a6ead2a77ae39eb551a8bafc570def86621
Author: LouShaokun <lsk...@163.com>
AuthorDate: Fri May 9 18:12:49 2025 +0800

    fix(java): Fix empty string processing in MetaStringBytes (#2212)
    
    ## What does this PR do?
    This PR fixes issue #2096 by improving the handling of empty strings in
    MetaStringBytes. The primary changes are:
    
    1. Explicitly defining that empty strings will use UTF-8 encoding for
    meta string encoding
    2. Adding a dedicated constant for empty MetaStringBytes: `public static
    final MetaStringBytes EMPTY = MetaStringBytes.of(MetaString.EMPTY)`
    3. Adding a length check to prevent potential buffer reading issues when
    length is zero
    
    These changes ensure that empty strings are handled consistently
    throughout the codebase and prevent potential errors when processing
    empty strings during serialization and deserialization.
    
    ## Related issues
    - #2096
    
    ## Does this PR introduce any user-facing change?
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    ## Additional Notes
    
    Since this PR involves changes across multiple components related to
    string handling, I'd appreciate a thorough review to ensure there are no
    unintended side effects. If there's a better approach to solving the
    empty string issue, I'm open to suggestions.
    
    Also, please note that the current main branch has an issue (#2211)
    affecting CrossLanguageTest, which this PR will also encounter. It might
    be beneficial to address #2211 first or at least be aware of it when
    reviewing this PR.
---
 .../main/java/org/apache/fury/meta/MetaString.java |  7 ++++++
 .../org/apache/fury/meta/MetaStringEncoder.java    | 29 +++++++++-------------
 .../org/apache/fury/resolver/MetaStringBytes.java  |  1 +
 .../apache/fury/resolver/MetaStringResolver.java   | 12 +++++++--
 4 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java 
b/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java
index 3c2be21b..a85426f9 100644
--- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java
+++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java
@@ -55,8 +55,15 @@ public class MetaString {
       }
       throw new IllegalArgumentException("Encoding flag not recognized: " + 
value);
     }
+
+    public static Encoding forEmptyStr() {
+      return UTF_8;
+    }
   }
 
+  public static final MetaString EMPTY =
+      new MetaString("", Encoding.forEmptyStr(), '\0', '\0', new byte[0]);
+
   private final String string;
   private final Encoding encoding;
   private final char specialChar1;
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java 
b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
index 396721f6..c163f5a7 100644
--- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
+++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java
@@ -20,8 +20,6 @@
 package org.apache.fury.meta;
 
 import java.nio.charset.StandardCharsets;
-import java.util.HashSet;
-import org.apache.fury.collection.Collections;
 import org.apache.fury.meta.MetaString.Encoding;
 import org.apache.fury.util.Preconditions;
 import org.apache.fury.util.StringUtils;
@@ -55,7 +53,7 @@ public class MetaStringEncoder {
 
   public MetaString encode(String input, Encoding[] encodings) {
     if (input.isEmpty()) {
-      return new MetaString(input, Encoding.UTF_8, specialChar1, specialChar2, 
new byte[0]);
+      return MetaString.EMPTY;
     }
     if (!StringUtils.isLatin(input.toCharArray())) {
       return new MetaString(
@@ -83,7 +81,7 @@ public class MetaStringEncoder {
       throw new IllegalArgumentException("Non-ASCII characters in meta string 
are not allowed");
     }
     if (input.isEmpty()) {
-      return new MetaString(input, Encoding.UTF_8, specialChar1, specialChar2, 
new byte[0]);
+      return MetaString.EMPTY;
     }
     byte[] bytes;
     switch (encoding) {
@@ -107,42 +105,39 @@ public class MetaStringEncoder {
     }
   }
 
-  public Encoding computeEncoding(String input) {
-    return computeEncoding(input, Encoding.values());
-  }
-
   public Encoding computeEncoding(String input, Encoding[] encodings) {
-    HashSet<Encoding> encodingSet = Collections.ofHashSet(encodings);
     if (input.isEmpty()) {
-      if (encodingSet.contains(Encoding.LOWER_SPECIAL)) {
-        return Encoding.LOWER_SPECIAL;
-      }
+      return Encoding.forEmptyStr();
+    }
+    boolean[] encodingFlags = new boolean[Encoding.values().length];
+    for (Encoding encoding : encodings) {
+      encodingFlags[encoding.ordinal()] = true;
     }
     char[] chars = input.toCharArray();
     StringStatistics statistics = computeStatistics(chars);
     if (statistics.canLowerSpecialEncoded) {
-      if (encodingSet.contains(Encoding.LOWER_SPECIAL)) {
+      if (encodingFlags[Encoding.LOWER_SPECIAL.ordinal()]) {
         return Encoding.LOWER_SPECIAL;
       }
     }
     if (statistics.canLowerUpperDigitSpecialEncoded) {
       if (statistics.digitCount != 0) {
-        if (encodingSet.contains(Encoding.LOWER_UPPER_DIGIT_SPECIAL)) {
+        if (encodingFlags[Encoding.LOWER_UPPER_DIGIT_SPECIAL.ordinal()]) {
           return Encoding.LOWER_UPPER_DIGIT_SPECIAL;
         }
       }
       int upperCount = statistics.upperCount;
       if (upperCount == 1 && Character.isUpperCase(chars[0])) {
-        if (encodingSet.contains(Encoding.FIRST_TO_LOWER_SPECIAL)) {
+        if (encodingFlags[Encoding.FIRST_TO_LOWER_SPECIAL.ordinal()]) {
           return Encoding.FIRST_TO_LOWER_SPECIAL;
         }
       }
       if ((chars.length + upperCount) * 5 < (chars.length * 6)) {
-        if (encodingSet.contains(Encoding.ALL_TO_LOWER_SPECIAL)) {
+        if (encodingFlags[Encoding.ALL_TO_LOWER_SPECIAL.ordinal()]) {
           return Encoding.ALL_TO_LOWER_SPECIAL;
         }
       }
-      if (encodingSet.contains(Encoding.LOWER_UPPER_DIGIT_SPECIAL)) {
+      if (encodingFlags[Encoding.LOWER_UPPER_DIGIT_SPECIAL.ordinal()]) {
         return Encoding.LOWER_UPPER_DIGIT_SPECIAL;
       }
     }
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java 
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
index 1612bcaf..00a73085 100644
--- a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
+++ b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java
@@ -30,6 +30,7 @@ import org.apache.fury.util.MurmurHash3;
 @Internal
 public final class MetaStringBytes {
   static final short DEFAULT_DYNAMIC_WRITE_STRING_ID = -1;
+  public static final MetaStringBytes EMPTY = 
MetaStringBytes.of(MetaString.EMPTY);
   private static final int HEADER_MASK = 0xff;
 
   final byte[] bytes;
diff --git 
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java 
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
index af24ce8e..9278ebe9 100644
--- 
a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
+++ 
b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java
@@ -220,8 +220,12 @@ public final class MetaStringResolver {
   }
 
   private MetaStringBytes readSmallMetaStringBytes(MemoryBuffer buffer, int 
len) {
-    long v1, v2 = 0;
     byte encoding = buffer.readByte();
+    if (len == 0) {
+      assert encoding == MetaString.Encoding.UTF_8.getValue();
+      return MetaStringBytes.EMPTY;
+    }
+    long v1, v2 = 0;
     if (len <= 8) {
       v1 = buffer.readBytesAsInt64(len);
     } else {
@@ -237,8 +241,12 @@ public final class MetaStringResolver {
 
   private MetaStringBytes readSmallMetaStringBytes(
       MemoryBuffer buffer, MetaStringBytes cache, int len) {
-    long v1, v2 = 0;
     byte encoding = buffer.readByte();
+    if (len == 0) {
+      assert encoding == MetaString.Encoding.UTF_8.getValue();
+      return MetaStringBytes.EMPTY;
+    }
+    long v1, v2 = 0;
     if (len <= 8) {
       v1 = buffer.readBytesAsInt64(len);
     } else {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@fury.apache.org
For additional commands, e-mail: commits-h...@fury.apache.org

Reply via email to