This is an automated email from the ASF dual-hosted git repository.

szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 7738b5bb03c HDDS-15355. Support StringCodec without fallback. (#10349)
7738b5bb03c is described below

commit 7738b5bb03c430ddf25f176e8fa842983f498671
Author: Tsz-Wo Nicholas Sze <[email protected]>
AuthorDate: Tue May 26 07:46:41 2026 -0700

    HDDS-15355. Support StringCodec without fallback. (#10349)
---
 .../apache/hadoop/hdds/utils/db/StringCodec.java   | 11 ++++--
 .../hadoop/hdds/utils/db/StringCodecBase.java      | 45 +++++++++++++++++-----
 .../hdds/utils/db/FixedLengthStringCodec.java      |  2 +-
 .../apache/hadoop/hdds/utils/db/RocksDatabase.java |  2 +-
 .../org/apache/hadoop/hdds/utils/db/TestCodec.java | 18 ++++++++-
 5 files changed, 62 insertions(+), 16 deletions(-)

diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
index b1a6120e72d..247070f4975 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
@@ -24,11 +24,16 @@
  * using {@link StandardCharsets#UTF_8},
  * a variable-length character encoding.
  */
-public final class StringCodec extends StringCodecBase {
-  private static final StringCodec CODEC = new StringCodec();
+public final class StringCodec extends StringCodecBase.WithFallback {
+  private static final StringCodec CODEC_WITH_FALLBACK = new StringCodec();
+  private static final Codec<String> CODEC_NO_FALLBACK = new 
StringCodecBase(StandardCharsets.UTF_8) { };
 
   public static StringCodec get() {
-    return CODEC;
+    return CODEC_WITH_FALLBACK;
+  }
+
+  public static Codec<String> getCodecNoFallback() {
+    return CODEC_NO_FALLBACK;
   }
 
   private StringCodec() {
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
index 62196a1bfff..88beda1f49d 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
@@ -112,20 +112,29 @@ private <E extends Exception> PutToByteBuffer<E> encode(
     };
   }
 
-  String decode(ByteBuffer buffer) {
+  String decodeNoFallback(ByteBuffer buffer) throws CodecException {
+    try {
+      return newDecoder().decode(buffer.asReadOnlyBuffer()).toString();
+    } catch (Exception e) {
+      throw new CodecException("Failed to decode " + buffer, e);
+    }
+  }
+
+  String decodeWithFallback(ByteBuffer buffer) {
     Runnable error = null;
     try {
       return newDecoder().decode(buffer.asReadOnlyBuffer()).toString();
     } catch (Exception e) {
-      error = () -> LOG.warn("Failed to decode buffer with " + charset
-          + ", buffer = (hex) " + StringUtils.bytes2Hex(buffer), e);
+      error = () -> LOG.warn("Failed to decode buffer with {}, buffer = (hex) 
{}",
+          charset, StringUtils.bytes2Hex(buffer, 20), e);
 
       // For compatibility, try decoding using StringUtils.
       final String decoded = StringUtils.bytes2String(buffer, charset);
       // Decoded successfully, update error message.
-      error = () -> LOG.warn("Decode (hex) " + StringUtils.bytes2Hex(buffer, 
20)
-          + "\n  Attempt failed : " + charset + " (see exception below)"
-          + "\n  Retry succeeded: decoded to " + decoded, e);
+      error = () -> LOG.warn("Decode (hex) {}" +
+              "\n  Attempt failed : {} (see exception below)" +
+              "\n  Retry succeeded: decoded to {}",
+          StringUtils.bytes2Hex(buffer, 20), charset, decoded, e);
       return decoded;
     } finally {
       if (error != null) {
@@ -177,8 +186,8 @@ public CodecBuffer toCodecBuffer(@Nonnull String object, 
CodecBuffer.Allocator a
   }
 
   @Override
-  public String fromCodecBuffer(@Nonnull CodecBuffer buffer) {
-    return decode(buffer.asReadOnlyByteBuffer());
+  public String fromCodecBuffer(@Nonnull CodecBuffer buffer) throws 
CodecException {
+    return decodeNoFallback(buffer.asReadOnlyByteBuffer());
   }
 
   @Override
@@ -187,12 +196,28 @@ public byte[] toPersistedFormat(String object) throws 
CodecException {
   }
 
   @Override
-  public String fromPersistedFormat(byte[] bytes) {
-    return decode(ByteBuffer.wrap(bytes));
+  public String fromPersistedFormat(byte[] bytes) throws CodecException {
+    return decodeNoFallback(ByteBuffer.wrap(bytes));
   }
 
   @Override
   public String copyObject(String object) {
     return object;
   }
+
+  static class WithFallback extends StringCodecBase {
+    WithFallback(Charset charset) {
+      super(charset);
+    }
+
+    @Override
+    public String fromCodecBuffer(@Nonnull CodecBuffer buffer) {
+      return decodeWithFallback(buffer.asReadOnlyByteBuffer());
+    }
+
+    @Override
+    public String fromPersistedFormat(byte[] bytes) {
+      return decodeWithFallback(ByteBuffer.wrap(bytes));
+    }
+  }
 }
diff --git 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
index 8c91b17bdaf..ae1155e6aa1 100644
--- 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
+++ 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
@@ -25,7 +25,7 @@
  * a fixed-length one-byte-per-character encoding,
  * i.e. the serialized size equals to {@link String#length()}.
  */
-public final class FixedLengthStringCodec extends StringCodecBase {
+public final class FixedLengthStringCodec extends StringCodecBase.WithFallback 
{
 
   private static final FixedLengthStringCodec INSTANCE
       = new FixedLengthStringCodec();
diff --git 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
index f344ad95e55..fe500e27447 100644
--- 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
+++ 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java
@@ -105,7 +105,7 @@ static String bytes2String(byte[] bytes) {
   }
 
   static String bytes2String(ByteBuffer bytes) {
-    return StringCodec.get().decode(bytes);
+    return StringCodec.get().decodeWithFallback(bytes);
   }
 
   static RocksDatabaseException toRocksDatabaseException(Object name, String 
op, RocksDBException e) {
diff --git 
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
 
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
index 4ce46b97cf8..649c8a46f92 100644
--- 
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
+++ 
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
@@ -32,6 +32,7 @@
 import com.google.common.primitives.Shorts;
 import com.google.protobuf.ByteString;
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.UUID;
 import java.util.concurrent.ThreadLocalRandom;
 import java.util.function.Consumer;
@@ -141,6 +142,20 @@ static void runTestLongs(long original) {
     assertEquals(original, codec.fromPersistedFormat(bytes));
   }
 
+  @Test
+  public void testStringCodecMalformedUtf8String() throws Exception {
+    final byte[] malformed = new byte[] {(byte) 0xC3, (byte) '/', 0, 0, 0, 1};
+
+    // StringCodec.getCodecNoFallback() should throw CodecException
+    assertThrows(CodecException.class,
+        () -> StringCodec.getCodecNoFallback().fromPersistedFormat(malformed));
+
+    // StringCodec.get() will replace malformed characters.
+    final String decoded = StringCodec.get().fromPersistedFormat(malformed);
+    final byte[] encoded = StringCodec.get().toPersistedFormat(decoded);
+    assertFalse(Arrays.equals(malformed, encoded));
+  }
+
   @Test
   public void testStringCodec() throws Exception {
     assertFalse(StringCodec.get().isFixedLength());
@@ -183,6 +198,7 @@ public void testStringCodec() throws Exception {
   static int runTestStringCodec(String original) throws Exception {
     final int serializedSize = UTF_8.encode(original).remaining();
     runTest(StringCodec.get(), original, serializedSize);
+    runTest(StringCodec.getCodecNoFallback(), original, serializedSize);
     return serializedSize;
   }
 
@@ -204,7 +220,7 @@ public void testFixedLengthStringCodec() throws Exception {
 
 
     final String multiByteChars = "Ozone 是 Hadoop 的分布式对象存储系统,具有易扩展和冗余存储的特点。";
-    assertThrows(IOException.class,
+    assertThrows(CodecException.class,
         tryCatch(() -> runTestFixedLengthStringCodec(multiByteChars)));
     assertThrows(IllegalStateException.class,
         tryCatch(() -> FixedLengthStringCodec.string2Bytes(multiByteChars)));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to