[ozone] branch master updated: HDDS-8724. FixedLengthStringCodec may silently replace unsupported characters. (#4801)

adoroszlai Mon, 05 Jun 2023 03:37:22 -0700

This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git



The following commit(s) were added to refs/heads/master by this push:
     new 329308f18a HDDS-8724. FixedLengthStringCodec may silently replace 
unsupported characters. (#4801)
329308f18a is described below

commit 329308f18ab3e0fe76480f46489de0b39ca9a76a
Author: Tsz-Wo Nicholas Sze <[email protected]>
AuthorDate: Mon Jun 5 18:36:58 2023 +0800

    HDDS-8724. FixedLengthStringCodec may silently replace unsupported 
characters. (#4801)
---
 .../java/org/apache/hadoop/hdds/StringUtils.java   |  21 ++-
 .../org/apache/hadoop/hdds/utils/db/Codec.java     |   8 +
 .../apache/hadoop/hdds/utils/db/CodecBuffer.java   |  70 ++++----
 .../hadoop/hdds/utils/db/StringCodecBase.java      | 200 +++++++++++++++++++++
 .../keyvalue/helpers/KeyValueContainerUtil.java    |   5 +-
 .../metadata/DatanodeSchemaThreeDBDefinition.java  |  13 +-
 .../metadata/DatanodeStoreSchemaThreeImpl.java     |   6 +-
 .../container/metadata/SchemaOneKeyCodec.java      |   4 +-
 .../hdds/utils/db/FixedLengthStringCodec.java      |  46 +++--
 .../hdds/utils/db/FixedLengthStringUtils.java      |  57 ------
 .../apache/hadoop/hdds/utils/db/StringCodec.java   |  52 +-----
 .../org/apache/hadoop/hdds/utils/db/TestCodec.java |  48 +++++
 ...gUtils.java => TestFixedLengthStringCodec.java} |   8 +-
 .../org/apache/hadoop/ozone/om/TestLDBCli.java     |   4 +-
 .../org/apache/hadoop/ozone/om/KeyManagerImpl.java |   2 +-
 .../org/apache/hadoop/ozone/debug/DBScanner.java   |   4 +-
 16 files changed, 365 insertions(+), 183 deletions(-)

diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/StringUtils.java 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/StringUtils.java
index 4f95839f05..3cf7477230 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/StringUtils.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/StringUtils.java
@@ -63,7 +63,26 @@ public final class StringUtils {
   }
 
   public static String bytes2String(ByteBuffer bytes) {
-    return Unpooled.wrappedBuffer(bytes.asReadOnlyBuffer()).toString(UTF8);
+    return bytes2String(bytes, UTF8);
+  }
+
+  public static String bytes2String(ByteBuffer bytes, Charset charset) {
+    return Unpooled.wrappedBuffer(bytes.asReadOnlyBuffer()).toString(charset);
+  }
+
+  public static String bytes2Hex(ByteBuffer buffer, int max) {
+    buffer = buffer.asReadOnlyBuffer();
+    final int remaining = buffer.remaining();
+    final int n = Math.min(max, remaining);
+    final StringBuilder builder = new StringBuilder(3 * n);
+    for (int i = 0; i < n; i++) {
+      builder.append(String.format("%02X ", buffer.get()));
+    }
+    return builder + (remaining > max ? "..." : "");
+  }
+
+  public static String bytes2Hex(ByteBuffer buffer) {
+    return bytes2Hex(buffer, buffer.remaining());
   }
 
   /**
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java
index 301665cb63..ad815e1e49 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java
@@ -44,6 +44,14 @@ public interface Codec<T> {
     return false;
   }
 
+  /**
+   * @return an upper bound, which should be obtained without serialization,
+   *         of the serialized size of the given object.
+   */
+  default int getSerializedSizeUpperBound(T object) {
+    throw new UnsupportedOperationException();
+  }
+
   /**
    * Serialize the given object to bytes.
    *
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
index 93f2ed716d..c8550d59c7 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
@@ -34,6 +34,7 @@ import java.io.OutputStream;
 import java.nio.ByteBuffer;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.IntFunction;
 import java.util.function.ToIntFunction;
 
 /**
@@ -45,46 +46,43 @@ public final class CodecBuffer implements AutoCloseable {
 
   private static final ByteBufAllocator POOL
       = PooledByteBufAllocator.DEFAULT;
+  private static final IntFunction<ByteBuf> POOL_DIRECT = c -> c >= 0
+      ? POOL.directBuffer(c, c) // allocate exact size
+      : POOL.directBuffer(-c);  // allocate a resizable buffer
+  private static final IntFunction<ByteBuf> POOL_HEAP = c -> c >= 0
+      ? POOL.heapBuffer(c, c)   // allocate exact size
+      : POOL.heapBuffer(-c);    // allocate a resizable buffer
 
-  
   /**
-   * Allocate a direct buffer.
-   * When the given capacity is non-negative, allocate a buffer by setting
-   * the initial capacity and the maximum capacity to the given capacity.
-   * When the given capacity is negative, allocate a buffer
-   * by setting only the initial capacity to the absolute value of it
-   * and then the buffer's capacity can be increased if necessary.
+   * Allocate a buffer using the given allocator.
+   *
+   * @param allocator Take a capacity parameter and return an allocated buffer.
+   *                  When the capacity is non-negative,
+   *                  allocate a buffer by setting the initial capacity
+   *                  and the maximum capacity to the given capacity.
+   *                  When the capacity is negative,
+   *                  allocate a buffer by setting only the initial capacity
+   *                  to the absolute value of it and, as a result,
+   *                  the buffer's capacity can be increased if necessary.
+   */
+  static CodecBuffer allocate(int capacity, IntFunction<ByteBuf> allocator) {
+    return new CodecBuffer(allocator.apply(capacity));
+  }
+
+  /**
+   * Allocate a pooled direct buffer.
+   * @see #allocate(int, IntFunction)
    */
   public static CodecBuffer allocateDirect(int capacity) {
-    final ByteBuf buf;
-    if (capacity >= 0) {
-      // allocate exact size
-      buf = POOL.directBuffer(capacity, capacity);
-    } else {
-      // allocate a resizable buffer
-      buf = POOL.directBuffer(-capacity);
-    }
-    return new CodecBuffer(buf);
+    return allocate(capacity, POOL_DIRECT);
   }
 
   /**
-   * Allocate a heap buffer.
-   * When the given capacity is non-negative, allocate a buffer by setting
-   * the initial capacity and the maximum capacity to the given capacity.
-   * When the given capacity is negative, allocate a buffer
-   * by setting only the initial capacity to the absolute value of it
-   * and then the buffer's capacity can be increased if necessary.
+   * Allocate a pooled heap buffer.
+   * @see #allocate(int, IntFunction)
    */
   public static CodecBuffer allocateHeap(int capacity) {
-    final ByteBuf buf;
-    if (capacity >= 0) {
-      // allocate exact size
-      buf = POOL.heapBuffer(capacity, capacity);
-    } else {
-      // allocate a resizable buffer
-      buf = POOL.heapBuffer(-capacity);
-    }
-    return new CodecBuffer(buf);
+    return allocate(capacity, POOL_HEAP);
   }
 
   /** Wrap the given array. */
@@ -189,6 +187,16 @@ public final class CodecBuffer implements AutoCloseable {
     return buf.nioBuffer().asReadOnlyBuffer();
   }
 
+  /**
+   * @return a new array containing the readable bytes.
+   * @see #readableBytes()
+   */
+  public byte[] getArray() {
+    final byte[] array = new byte[readableBytes()];
+    buf.readBytes(array);
+    return array;
+  }
+
   /** @return an {@link InputStream} reading from this buffer. */
   public InputStream getInputStream() {
     return new ByteBufInputStream(buf.duplicate());
diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
new file mode 100644
index 0000000000..799b7a9381
--- /dev/null
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.hadoop.hdds.utils.db;
+
+import org.apache.hadoop.hdds.StringUtils;
+import org.apache.ratis.util.Preconditions;
+import org.apache.ratis.util.function.CheckedFunction;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.Nonnull;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.util.function.Function;
+import java.util.function.IntFunction;
+
+/**
+ * An abstract {@link Codec} to serialize/deserialize {@link String}
+ * using a {@link Charset} provided by subclasses.
+ */
+abstract class StringCodecBase implements Codec<String> {
+  static final Logger LOG = LoggerFactory.getLogger(StringCodecBase.class);
+
+  private final Charset charset;
+  private final boolean fixedLength;
+  private final int maxBytesPerChar;
+
+  StringCodecBase(Charset charset) {
+    this.charset = charset;
+
+    final CharsetEncoder encoder = charset.newEncoder();
+    final float max = encoder.maxBytesPerChar();
+    this.maxBytesPerChar = (int) max;
+    if (maxBytesPerChar != max) {
+      throw new ArithmeticException("Round off error in " + charset
+          + ": maxBytesPerChar = " + max + " is not an integer.");
+    }
+    this.fixedLength = max == encoder.averageBytesPerChar();
+  }
+
+  CharsetEncoder newEncoder() {
+    return charset.newEncoder()
+        .onMalformedInput(CodingErrorAction.REPORT)
+        .onUnmappableCharacter(CodingErrorAction.REPORT);
+  }
+
+  CharsetDecoder newDecoder() {
+    return charset.newDecoder()
+        .onMalformedInput(CodingErrorAction.REPORT)
+        .onUnmappableCharacter(CodingErrorAction.REPORT);
+  }
+
+  /**
+   * Is this a fixed-length {@link Codec}?
+   * <p>
+   * For a fixed-length {@link Codec},
+   * each character is encoded to the same number of bytes and
+   * {@link #getSerializedSizeUpperBound(String)} equals to the serialized 
size.
+   */
+  public boolean isFixedLength() {
+    return fixedLength;
+  }
+
+  /**
+   * @return an upper bound, which can be obtained without encoding,
+   *         of the serialized size of the given {@link String}.
+   *         When {@link #isFixedLength()} is true,
+   *         the upper bound equals to the serialized size.
+   */
+  @Override
+  public int getSerializedSizeUpperBound(String s) {
+    return maxBytesPerChar * s.length();
+  }
+
+  private <E extends Exception> CheckedFunction<ByteBuffer, Integer, E> encode(
+      String string, Integer serializedSize, Function<String, E> newE) {
+    return buffer -> {
+      final CoderResult result = newEncoder().encode(
+          CharBuffer.wrap(string), buffer, true);
+      if (result.isError()) {
+        throw newE.apply("Failed to encode with " + charset + ": " + result
+            + ", string=" + string);
+      }
+      final int remaining = buffer.flip().remaining();
+      if (serializedSize != null && serializedSize != remaining) {
+        throw newE.apply("Size mismatched: Expected size is " + serializedSize
+            + " but actual size is " + remaining + ", string=" + string);
+      }
+      return remaining;
+    };
+  }
+
+  String decode(ByteBuffer buffer) {
+    Runnable error = null;
+    try {
+      return newDecoder().decode(buffer.asReadOnlyBuffer()).toString();
+    } catch (Exception e) {
+      error = () -> LOG.warn("Failed to decode buffer with " + charset
+          + ", buffer = (hex) " + StringUtils.bytes2Hex(buffer), e);
+
+      // For compatibility, try decoding using StringUtils.
+      final String decoded = StringUtils.bytes2String(buffer, charset);
+      // Decoded successfully, update error message.
+      error = () -> LOG.warn("Decode (hex) " + StringUtils.bytes2Hex(buffer, 
20)
+          + "\n  Attempt failed : " + charset + " (see exception below)"
+          + "\n  Retry succeeded: decoded to " + decoded, e);
+      return decoded;
+    } finally {
+      if (error != null) {
+        error.run();
+      }
+    }
+  }
+
+  /** Encode the given {@link String} to a byte array. */
+  <E extends Exception> byte[] string2Bytes(String string,
+      Function<String, E> newE) throws E {
+    final int upperBound = getSerializedSizeUpperBound(string);
+    final Integer serializedSize = isFixedLength() ? upperBound : null;
+    final CheckedFunction<ByteBuffer, Integer, E> encoder
+        = encode(string, serializedSize, newE);
+
+    if (serializedSize != null) {
+      // When the serialized size is known, create an array
+      // and then wrap it as a buffer for encoding.
+      final byte[] array = new byte[serializedSize];
+      final Integer encoded = encoder.apply(ByteBuffer.wrap(array));
+      Preconditions.assertSame(serializedSize, encoded, "serializedSize");
+      return array;
+    } else {
+      // When the serialized size is unknown, allocate a larger buffer
+      // and then get an array.
+      try (CodecBuffer buffer = CodecBuffer.allocateHeap(upperBound)) {
+        buffer.putFromSource(encoder);
+
+        // require a buffer copying
+        // unless upperBound equals to the serialized size.
+        return buffer.getArray();
+      }
+    }
+  }
+
+  @Override
+  public boolean supportCodecBuffer() {
+    return true;
+  }
+
+  @Override
+  public CodecBuffer toCodecBuffer(@Nonnull String object,
+      IntFunction<CodecBuffer> allocator) throws IOException {
+    // allocate a larger buffer to avoid encoding twice.
+    final int upperBound = getSerializedSizeUpperBound(object);
+    final CodecBuffer buffer = allocator.apply(upperBound);
+    buffer.putFromSource(encode(object, null, IOException::new));
+    return buffer;
+  }
+
+  @Override
+  public String fromCodecBuffer(@Nonnull CodecBuffer buffer)
+      throws IOException {
+    return decode(buffer.asReadOnlyByteBuffer());
+  }
+
+  @Override
+  public byte[] toPersistedFormat(String object) throws IOException {
+    return string2Bytes(object, IOException::new);
+  }
+
+  @Override
+  public String fromPersistedFormat(byte[] bytes) {
+    return decode(ByteBuffer.wrap(bytes));
+  }
+
+  @Override
+  public String copyObject(String object) {
+    return object;
+  }
+}
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
index 9dd593c5a4..8853497e2c 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
@@ -182,7 +182,6 @@ public final class KeyValueContainerUtil {
   /**
    * Returns if there are no blocks in the container.
    * @param containerData Container to check
-   * @param conf configuration
    * @return true if the directory containing blocks is empty
    * @throws IOException
    */
@@ -369,8 +368,8 @@ public final class KeyValueContainerUtil {
         blockCount++;
         try {
           usedBytes += getBlockLength(blockIter.nextBlock());
-        } catch (IOException ex) {
-          LOG.error(errorMessage);
+        } catch (Exception ex) {
+          LOG.error(errorMessage, ex);
         }
       }
     }
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java
index 51b5e6c271..745e1153da 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java
@@ -22,7 +22,6 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
 import org.apache.hadoop.hdds.utils.db.DBColumnFamilyDefinition;
 import org.apache.hadoop.hdds.utils.db.DBDefinition;
-import org.apache.hadoop.hdds.utils.db.FixedLengthStringUtils;
 import org.apache.hadoop.hdds.utils.db.LongCodec;
 import org.apache.hadoop.hdds.utils.db.FixedLengthStringCodec;
 import org.apache.hadoop.hdds.utils.db.Proto2Codec;
@@ -42,7 +41,7 @@ import static 
org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PRO
  * version 3, where the block data, metadata, and transactions which are to be
  * deleted are put in their own separate column families and with containerID
  * as key prefix.
- *
+ * <p>
  * Some key format illustrations for the column families:
  * - block_data:     containerID | blockID
  * - metadata:       containerID | #BLOCKCOUNT
@@ -50,7 +49,7 @@ import static 
org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PRO
  *                   ...
  * - deleted_blocks: containerID | blockID
  * - delete_txns:    containerID | TransactionID
- *
+ * <p>
  * The keys would be encoded in a fix-length encoding style in order to
  * utilize the "Prefix Seek" feature from Rocksdb to optimize seek.
  */
@@ -153,19 +152,19 @@ public class DatanodeSchemaThreeDBDefinition
   }
 
   public static int getContainerKeyPrefixLength() {
-    return FixedLengthStringUtils.string2Bytes(
+    return FixedLengthStringCodec.string2Bytes(
         getContainerKeyPrefix(0L)).length;
   }
 
   public static String getContainerKeyPrefix(long containerID) {
     // NOTE: Rocksdb normally needs a fixed length prefix.
-    return FixedLengthStringUtils.bytes2String(Longs.toByteArray(containerID))
+    return FixedLengthStringCodec.bytes2String(Longs.toByteArray(containerID))
         + separator;
   }
 
   public static byte[] getContainerKeyPrefixBytes(long containerID) {
     // NOTE: Rocksdb normally needs a fixed length prefix.
-    return FixedLengthStringUtils.string2Bytes(
+    return FixedLengthStringCodec.string2Bytes(
         getContainerKeyPrefix(containerID));
   }
 
@@ -180,7 +179,7 @@ public class DatanodeSchemaThreeDBDefinition
   public static long getContainerId(String key) {
     int index = getContainerKeyPrefixLength();
     String cid = key.substring(0, index);
-    return Longs.fromByteArray(FixedLengthStringUtils.string2Bytes(cid));
+    return Longs.fromByteArray(FixedLengthStringCodec.string2Bytes(cid));
   }
 
   private void setSeparator(String keySeparator) {
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaThreeImpl.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaThreeImpl.java
index 43739d2a33..ee8580defa 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaThreeImpl.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaThreeImpl.java
@@ -21,7 +21,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
 import org.apache.hadoop.hdds.utils.MetadataKeyFilters;
 import org.apache.hadoop.hdds.utils.db.BatchOperation;
-import org.apache.hadoop.hdds.utils.db.FixedLengthStringUtils;
+import org.apache.hadoop.hdds.utils.db.FixedLengthStringCodec;
 import org.apache.hadoop.hdds.utils.db.RDBStore;
 import org.apache.hadoop.hdds.utils.db.RocksDatabase;
 import org.apache.hadoop.hdds.utils.db.RocksDatabase.ColumnFamily;
@@ -187,9 +187,9 @@ public class DatanodeStoreSchemaThreeImpl extends 
AbstractDatanodeStore
             long endCId = Long.MIN_VALUE;
             for (LiveFileMetaData file: innerEntry.getValue()) {
               long firstCId = DatanodeSchemaThreeDBDefinition.getContainerId(
-                  FixedLengthStringUtils.bytes2String(file.smallestKey()));
+                  FixedLengthStringCodec.bytes2String(file.smallestKey()));
               long lastCId = DatanodeSchemaThreeDBDefinition.getContainerId(
-                  FixedLengthStringUtils.bytes2String(file.largestKey()));
+                  FixedLengthStringCodec.bytes2String(file.largestKey()));
               startCId = Math.min(firstCId, startCId);
               endCId = Math.max(lastCId, endCId);
             }
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneKeyCodec.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneKeyCodec.java
index 4271279437..2f1660f4d2 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneKeyCodec.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneKeyCodec.java
@@ -24,6 +24,8 @@ import org.apache.hadoop.hdds.utils.db.StringCodec;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+
 /**
  * Containers written using schema version 1 wrote unprefixed block ID keys
  * as longs, and metadata or prefixed block IDs as Strings. This was done
@@ -47,7 +49,7 @@ public final class SchemaOneKeyCodec implements Codec<String> 
{
   }
 
   @Override
-  public byte[] toPersistedFormat(String stringObject) {
+  public byte[] toPersistedFormat(String stringObject) throws IOException {
     try {
       // If the caller's string has no prefix, it should be stored as a long
       // to be encoded as a long to be consistent with the schema one
diff --git 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
index cca0e0da29..2d4add8194 100644
--- 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
+++ 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
@@ -18,44 +18,40 @@
  */
 package org.apache.hadoop.hdds.utils.db;
 
-import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 
 /**
- * Codec to convert a prefixed String to/from byte array.
- * The prefix has to be of fixed-length.
+ * A {@link Codec} to serialize/deserialize {@link String}
+ * using {@link StandardCharsets#ISO_8859_1},
+ * a fixed-length one-byte-per-character encoding,
+ * i.e. the serialized size equals to {@link String#length()}.
  */
-public final class FixedLengthStringCodec implements Codec<String> {
+public final class FixedLengthStringCodec extends StringCodecBase {
 
-  private static final Codec<String> INSTANCE = new FixedLengthStringCodec();
+  private static final FixedLengthStringCodec INSTANCE
+      = new FixedLengthStringCodec();
 
-  public static Codec<String> get() {
+  public static FixedLengthStringCodec get() {
     return INSTANCE;
   }
 
   private FixedLengthStringCodec() {
     // singleton
+    super(StandardCharsets.ISO_8859_1);
   }
 
-  @Override
-  public byte[] toPersistedFormat(String object) throws IOException {
-    if (object != null) {
-      return FixedLengthStringUtils.string2Bytes(object);
-    } else {
-      return null;
-    }
+  /**
+   * Encode the given {@link String} to a byte array.
+   * @throws IllegalStateException in case an encoding error occurs.
+   */
+  public static byte[] string2Bytes(String string) {
+    return get().string2Bytes(string, IllegalStateException::new);
   }
 
-  @Override
-  public String fromPersistedFormat(byte[] rawData) throws IOException {
-    if (rawData != null) {
-      return FixedLengthStringUtils.bytes2String(rawData);
-    } else {
-      return null;
-    }
-  }
-
-  @Override
-  public String copyObject(String object) {
-    return object;
+  /**
+   * Decode the given byte array to a {@link String}.
+   */
+  public static String bytes2String(byte[] bytes) {
+    return get().fromPersistedFormat(bytes);
   }
 }
diff --git 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringUtils.java
 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringUtils.java
deleted file mode 100644
index 31eee1c569..0000000000
--- 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringUtils.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdds.utils.db;
-
-import java.io.UnsupportedEncodingException;
-import java.nio.charset.StandardCharsets;
-
-/**
- * String utility class for conversion between byte[] and string
- * which requires string to be of non-variable-length encoding(e.g. ASCII).
- * This is different from StringUtils which uses UTF-8 encoding which is
- * a variable-length encoding style.
- * This is mainly for FixedLengthStringCodec which requires a fixed-length
- * prefix.
- */
-public final class FixedLengthStringUtils {
-
-  private FixedLengthStringUtils() {
-  }
-
-  // An ASCII extension: https://en.wikipedia.org/wiki/ISO/IEC_8859-1
-  // Each character is encoded as a single eight-bit code value.
-  private static final String ASCII_CSN = StandardCharsets.ISO_8859_1.name();
-
-  public static String bytes2String(byte[] bytes) {
-    try {
-      return new String(bytes, 0, bytes.length, ASCII_CSN);
-    } catch (UnsupportedEncodingException e) {
-      throw new IllegalArgumentException(
-          "ISO_8859_1 encoding is not supported", e);
-    }
-  }
-
-  public static byte[] string2Bytes(String str) {
-    try {
-      return str.getBytes(ASCII_CSN);
-    } catch (UnsupportedEncodingException e) {
-      throw new IllegalArgumentException(
-          "ISO_8859_1 decoding is not supported", e);
-    }
-  }
-}
diff --git 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
index 531381714e..18c736914f 100644
--- 
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
+++ 
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
@@ -18,15 +18,14 @@
  */
 package org.apache.hadoop.hdds.utils.db;
 
-import java.util.function.IntFunction;
-import javax.annotation.Nonnull;
-
-import org.apache.hadoop.hdds.StringUtils;
+import java.nio.charset.StandardCharsets;
 
 /**
- * Codec to serialize/deserialize {@link String}.
+ * A {@link Codec} to serialize/deserialize {@link String}
+ * using {@link StandardCharsets#UTF_8},
+ * a variable-length character encoding.
  */
-public final class StringCodec implements Codec<String> {
+public final class StringCodec extends StringCodecBase {
   private static final StringCodec CODEC = new StringCodec();
 
   public static StringCodec get() {
@@ -35,45 +34,6 @@ public final class StringCodec implements Codec<String> {
 
   private StringCodec() {
     // singleton
-  }
-
-  @Override
-  public boolean supportCodecBuffer() {
-    return true;
-  }
-
-  @Override
-  public CodecBuffer toCodecBuffer(@Nonnull String object,
-      IntFunction<CodecBuffer> allocator) {
-    final byte[] array = toPersistedFormat(object);
-    return allocator.apply(array.length).put(array);
-  }
-
-  @Override
-  public String fromCodecBuffer(@Nonnull CodecBuffer buffer) {
-    return StringUtils.bytes2String(buffer.asReadOnlyByteBuffer());
-  }
-
-  @Override
-  public byte[] toPersistedFormat(String object) {
-    if (object != null) {
-      return StringUtils.string2Bytes(object);
-    } else {
-      return null;
-    }
-  }
-
-  @Override
-  public String fromPersistedFormat(byte[] rawData) {
-    if (rawData != null) {
-      return StringUtils.bytes2String(rawData);
-    } else {
-      return null;
-    }
-  }
-
-  @Override
-  public String copyObject(String object) {
-    return object;
+    super(StandardCharsets.UTF_8);
   }
 }
diff --git 
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
 
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
index 52aec7e835..7060a30daa 100644
--- 
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
+++ 
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
@@ -22,6 +22,7 @@ import com.google.common.primitives.Longs;
 import com.google.common.primitives.Shorts;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.function.Executable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -32,6 +33,7 @@ import java.lang.ref.WeakReference;
 import java.nio.ByteBuffer;
 import java.util.UUID;
 import java.util.concurrent.ThreadLocalRandom;
+import java.util.function.Consumer;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
@@ -147,6 +149,7 @@ public final class TestCodec {
 
   @Test
   public void testStringCodec() throws Exception {
+    Assertions.assertFalse(StringCodec.get().isFixedLength());
     runTestStringCodec("");
 
     for (int i = 0; i < NUM_LOOPS; i++) {
@@ -189,6 +192,51 @@ public final class TestCodec {
     return serializedSize;
   }
 
+  @Test
+  public void testFixedLengthStringCodec() throws Exception {
+    Assertions.assertTrue(FixedLengthStringCodec.get().isFixedLength());
+    runTestFixedLengthStringCodec("");
+
+    for (int i = 0; i < NUM_LOOPS; i++) {
+      final String original = "test" + ThreadLocalRandom.current().nextLong();
+      runTestFixedLengthStringCodec(original);
+    }
+
+    final String alphabets = "AbcdEfghIjklmnOpqrstUvwxyz";
+    for (int i = 0; i < NUM_LOOPS; i++) {
+      final String original = i == 0 ? alphabets : alphabets.substring(0, i);
+      runTestFixedLengthStringCodec(original);
+    }
+
+
+    final String multiByteChars = "Ozone 是 Hadoop 的分布式对象存储系统，具有易扩展和冗余存储的特点。";
+    Assertions.assertThrows(IOException.class,
+        tryCatch(() -> runTestFixedLengthStringCodec(multiByteChars)));
+    Assertions.assertThrows(IllegalStateException.class,
+        tryCatch(() -> FixedLengthStringCodec.string2Bytes(multiByteChars)));
+
+    gc();
+  }
+
+  static Executable tryCatch(Executable executable) {
+    return tryCatch(executable, t -> LOG.info("Good!", t));
+  }
+
+  static Executable tryCatch(Executable executable, Consumer<Throwable> log) {
+    return () -> {
+      try {
+        executable.execute();
+      } catch (Throwable t) {
+        log.accept(t);
+        throw t;
+      }
+    };
+  }
+
+  static void runTestFixedLengthStringCodec(String original) throws Exception {
+    runTest(FixedLengthStringCodec.get(), original, original.length());
+  }
+
   @Test
   public void testUuidCodec() throws Exception {
     final int size = UuidCodec.getSerializedSize();
diff --git 
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringUtils.java
 
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringCodec.java
similarity index 86%
rename from 
hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringUtils.java
rename to 
hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringCodec.java
index b6d1e6a664..e71eae6337 100644
--- 
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringUtils.java
+++ 
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringCodec.java
@@ -23,9 +23,9 @@ import org.junit.Test;
 import static org.junit.Assert.assertEquals;
 
 /**
- * Test for class FixedLengthStringUtils.
+ * Test for class {@link FixedLengthStringCodec}.
  */
-public class TestFixedLengthStringUtils {
+public class TestFixedLengthStringCodec {
 
   @Test
   public void testStringEncodeAndDecode() {
@@ -35,10 +35,10 @@ public class TestFixedLengthStringUtils {
     };
 
     for (long containerID : testContainerIDs) {
-      String containerPrefix = FixedLengthStringUtils.bytes2String(
+      String containerPrefix = FixedLengthStringCodec.bytes2String(
           Longs.toByteArray(containerID));
       long decodedContainerID = Longs.fromByteArray(
-          FixedLengthStringUtils.string2Bytes(containerPrefix));
+          FixedLengthStringCodec.string2Bytes(containerPrefix));
       assertEquals(containerID, decodedContainerID);
     }
   }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java
index 338ce144c7..be4f64bf8c 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.utils.db.DBStore;
 import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
-import org.apache.hadoop.hdds.utils.db.FixedLengthStringUtils;
+import org.apache.hadoop.hdds.utils.db.FixedLengthStringCodec;
 import org.apache.hadoop.hdds.utils.db.Table;
 import org.apache.hadoop.ozone.ClientVersion;
 import org.apache.hadoop.ozone.OzoneConsts;
@@ -303,7 +303,7 @@ public class TestLDBCli {
           if (schemaV3) {
             String dbKeyStr = DatanodeSchemaThreeDBDefinition
                 .getContainerKeyPrefix(cid) + blockId;
-            dbKey = FixedLengthStringUtils.string2Bytes(dbKeyStr);
+            dbKey = FixedLengthStringCodec.string2Bytes(dbKeyStr);
             // Schema V3 ldb scan output key is "containerId: blockId"
             mapKey = cid + keySeparatorSchemaV3 + blockId;
           } else {
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
index fc215d2fb3..f4fc52fc12 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
@@ -1698,7 +1698,7 @@ public class KeyManagerImpl implements KeyManager {
   }
 
   private String getNextGreaterString(String volumeName, String bucketName,
-      String keyPrefix) {
+      String keyPrefix) throws IOException {
     // Increment the last character of the string and return the new ozone key.
     Preconditions.checkArgument(!Strings.isNullOrEmpty(keyPrefix),
         "Key prefix is null or empty");
diff --git 
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java 
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java
index 9146abfbc9..eba61c9ddb 100644
--- 
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java
+++ 
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
 import org.apache.hadoop.hdds.utils.db.DBColumnFamilyDefinition;
 import org.apache.hadoop.hdds.utils.db.DBDefinition;
-import org.apache.hadoop.hdds.utils.db.FixedLengthStringUtils;
+import org.apache.hadoop.hdds.utils.db.FixedLengthStringCodec;
 import org.apache.hadoop.hdds.utils.db.LongCodec;
 import org.apache.hadoop.hdds.utils.db.RocksDatabase;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedReadOptions;
@@ -225,7 +225,7 @@ public class DBScanner implements Callable<Void>, 
SubcommandWithParent {
           String cid = keyStr.substring(0, index);
           String blockId = keyStr.substring(index);
           sb.append(gson.toJson(LongCodec.get().fromPersistedFormat(
-              FixedLengthStringUtils.string2Bytes(cid)) +
+              FixedLengthStringCodec.string2Bytes(cid)) +
               keySeparatorSchemaV3 +
               blockId));
         } else {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[ozone] branch master updated: HDDS-8724. FixedLengthStringCodec may silently replace unsupported characters. (#4801)

Reply via email to