This is an automated email from the ASF dual-hosted git repository. chaokunyang pushed a commit to branch releases-0.10 in repository https://gitbox.apache.org/repos/asf/fury.git
commit b1ed2a6ead2a77ae39eb551a8bafc570def86621 Author: LouShaokun <lsk...@163.com> AuthorDate: Fri May 9 18:12:49 2025 +0800 fix(java): Fix empty string processing in MetaStringBytes (#2212) ## What does this PR do? This PR fixes issue #2096 by improving the handling of empty strings in MetaStringBytes. The primary changes are: 1. Explicitly defining that empty strings will use UTF-8 encoding for meta string encoding 2. Adding a dedicated constant for empty MetaStringBytes: `public static final MetaStringBytes EMPTY = MetaStringBytes.of(MetaString.EMPTY)` 3. Adding a length check to prevent potential buffer reading issues when length is zero These changes ensure that empty strings are handled consistently throughout the codebase and prevent potential errors when processing empty strings during serialization and deserialization. ## Related issues - #2096 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark ## Additional Notes Since this PR involves changes across multiple components related to string handling, I'd appreciate a thorough review to ensure there are no unintended side effects. If there's a better approach to solving the empty string issue, I'm open to suggestions. Also, please note that the current main branch has an issue (#2211) affecting CrossLanguageTest, which this PR will also encounter. It might be beneficial to address #2211 first or at least be aware of it when reviewing this PR. --- .../main/java/org/apache/fury/meta/MetaString.java | 7 ++++++ .../org/apache/fury/meta/MetaStringEncoder.java | 29 +++++++++------------- .../org/apache/fury/resolver/MetaStringBytes.java | 1 + .../apache/fury/resolver/MetaStringResolver.java | 12 +++++++-- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java b/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java index 3c2be21b..a85426f9 100644 --- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java +++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaString.java @@ -55,8 +55,15 @@ public class MetaString { } throw new IllegalArgumentException("Encoding flag not recognized: " + value); } + + public static Encoding forEmptyStr() { + return UTF_8; + } } + public static final MetaString EMPTY = + new MetaString("", Encoding.forEmptyStr(), '\0', '\0', new byte[0]); + private final String string; private final Encoding encoding; private final char specialChar1; diff --git a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java index 396721f6..c163f5a7 100644 --- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java +++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java @@ -20,8 +20,6 @@ package org.apache.fury.meta; import java.nio.charset.StandardCharsets; -import java.util.HashSet; -import org.apache.fury.collection.Collections; import org.apache.fury.meta.MetaString.Encoding; import org.apache.fury.util.Preconditions; import org.apache.fury.util.StringUtils; @@ -55,7 +53,7 @@ public class MetaStringEncoder { public MetaString encode(String input, Encoding[] encodings) { if (input.isEmpty()) { - return new MetaString(input, Encoding.UTF_8, specialChar1, specialChar2, new byte[0]); + return MetaString.EMPTY; } if (!StringUtils.isLatin(input.toCharArray())) { return new MetaString( @@ -83,7 +81,7 @@ public class MetaStringEncoder { throw new IllegalArgumentException("Non-ASCII characters in meta string are not allowed"); } if (input.isEmpty()) { - return new MetaString(input, Encoding.UTF_8, specialChar1, specialChar2, new byte[0]); + return MetaString.EMPTY; } byte[] bytes; switch (encoding) { @@ -107,42 +105,39 @@ public class MetaStringEncoder { } } - public Encoding computeEncoding(String input) { - return computeEncoding(input, Encoding.values()); - } - public Encoding computeEncoding(String input, Encoding[] encodings) { - HashSet<Encoding> encodingSet = Collections.ofHashSet(encodings); if (input.isEmpty()) { - if (encodingSet.contains(Encoding.LOWER_SPECIAL)) { - return Encoding.LOWER_SPECIAL; - } + return Encoding.forEmptyStr(); + } + boolean[] encodingFlags = new boolean[Encoding.values().length]; + for (Encoding encoding : encodings) { + encodingFlags[encoding.ordinal()] = true; } char[] chars = input.toCharArray(); StringStatistics statistics = computeStatistics(chars); if (statistics.canLowerSpecialEncoded) { - if (encodingSet.contains(Encoding.LOWER_SPECIAL)) { + if (encodingFlags[Encoding.LOWER_SPECIAL.ordinal()]) { return Encoding.LOWER_SPECIAL; } } if (statistics.canLowerUpperDigitSpecialEncoded) { if (statistics.digitCount != 0) { - if (encodingSet.contains(Encoding.LOWER_UPPER_DIGIT_SPECIAL)) { + if (encodingFlags[Encoding.LOWER_UPPER_DIGIT_SPECIAL.ordinal()]) { return Encoding.LOWER_UPPER_DIGIT_SPECIAL; } } int upperCount = statistics.upperCount; if (upperCount == 1 && Character.isUpperCase(chars[0])) { - if (encodingSet.contains(Encoding.FIRST_TO_LOWER_SPECIAL)) { + if (encodingFlags[Encoding.FIRST_TO_LOWER_SPECIAL.ordinal()]) { return Encoding.FIRST_TO_LOWER_SPECIAL; } } if ((chars.length + upperCount) * 5 < (chars.length * 6)) { - if (encodingSet.contains(Encoding.ALL_TO_LOWER_SPECIAL)) { + if (encodingFlags[Encoding.ALL_TO_LOWER_SPECIAL.ordinal()]) { return Encoding.ALL_TO_LOWER_SPECIAL; } } - if (encodingSet.contains(Encoding.LOWER_UPPER_DIGIT_SPECIAL)) { + if (encodingFlags[Encoding.LOWER_UPPER_DIGIT_SPECIAL.ordinal()]) { return Encoding.LOWER_UPPER_DIGIT_SPECIAL; } } diff --git a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java index 1612bcaf..00a73085 100644 --- a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java +++ b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java @@ -30,6 +30,7 @@ import org.apache.fury.util.MurmurHash3; @Internal public final class MetaStringBytes { static final short DEFAULT_DYNAMIC_WRITE_STRING_ID = -1; + public static final MetaStringBytes EMPTY = MetaStringBytes.of(MetaString.EMPTY); private static final int HEADER_MASK = 0xff; final byte[] bytes; diff --git a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java index af24ce8e..9278ebe9 100644 --- a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java +++ b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringResolver.java @@ -220,8 +220,12 @@ public final class MetaStringResolver { } private MetaStringBytes readSmallMetaStringBytes(MemoryBuffer buffer, int len) { - long v1, v2 = 0; byte encoding = buffer.readByte(); + if (len == 0) { + assert encoding == MetaString.Encoding.UTF_8.getValue(); + return MetaStringBytes.EMPTY; + } + long v1, v2 = 0; if (len <= 8) { v1 = buffer.readBytesAsInt64(len); } else { @@ -237,8 +241,12 @@ public final class MetaStringResolver { private MetaStringBytes readSmallMetaStringBytes( MemoryBuffer buffer, MetaStringBytes cache, int len) { - long v1, v2 = 0; byte encoding = buffer.readByte(); + if (len == 0) { + assert encoding == MetaString.Encoding.UTF_8.getValue(); + return MetaStringBytes.EMPTY; + } + long v1, v2 = 0; if (len <= 8) { v1 = buffer.readBytesAsInt64(len); } else { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@fury.apache.org For additional commands, e-mail: commits-h...@fury.apache.org