This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 329308f18a HDDS-8724. FixedLengthStringCodec may silently replace
unsupported characters. (#4801)
329308f18a is described below
commit 329308f18ab3e0fe76480f46489de0b39ca9a76a
Author: Tsz-Wo Nicholas Sze <[email protected]>
AuthorDate: Mon Jun 5 18:36:58 2023 +0800
HDDS-8724. FixedLengthStringCodec may silently replace unsupported
characters. (#4801)
---
.../java/org/apache/hadoop/hdds/StringUtils.java | 21 ++-
.../org/apache/hadoop/hdds/utils/db/Codec.java | 8 +
.../apache/hadoop/hdds/utils/db/CodecBuffer.java | 70 ++++----
.../hadoop/hdds/utils/db/StringCodecBase.java | 200 +++++++++++++++++++++
.../keyvalue/helpers/KeyValueContainerUtil.java | 5 +-
.../metadata/DatanodeSchemaThreeDBDefinition.java | 13 +-
.../metadata/DatanodeStoreSchemaThreeImpl.java | 6 +-
.../container/metadata/SchemaOneKeyCodec.java | 4 +-
.../hdds/utils/db/FixedLengthStringCodec.java | 46 +++--
.../hdds/utils/db/FixedLengthStringUtils.java | 57 ------
.../apache/hadoop/hdds/utils/db/StringCodec.java | 52 +-----
.../org/apache/hadoop/hdds/utils/db/TestCodec.java | 48 +++++
...gUtils.java => TestFixedLengthStringCodec.java} | 8 +-
.../org/apache/hadoop/ozone/om/TestLDBCli.java | 4 +-
.../org/apache/hadoop/ozone/om/KeyManagerImpl.java | 2 +-
.../org/apache/hadoop/ozone/debug/DBScanner.java | 4 +-
16 files changed, 365 insertions(+), 183 deletions(-)
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/StringUtils.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/StringUtils.java
index 4f95839f05..3cf7477230 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/StringUtils.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/StringUtils.java
@@ -63,7 +63,26 @@ public final class StringUtils {
}
public static String bytes2String(ByteBuffer bytes) {
- return Unpooled.wrappedBuffer(bytes.asReadOnlyBuffer()).toString(UTF8);
+ return bytes2String(bytes, UTF8);
+ }
+
+ public static String bytes2String(ByteBuffer bytes, Charset charset) {
+ return Unpooled.wrappedBuffer(bytes.asReadOnlyBuffer()).toString(charset);
+ }
+
+ public static String bytes2Hex(ByteBuffer buffer, int max) {
+ buffer = buffer.asReadOnlyBuffer();
+ final int remaining = buffer.remaining();
+ final int n = Math.min(max, remaining);
+ final StringBuilder builder = new StringBuilder(3 * n);
+ for (int i = 0; i < n; i++) {
+ builder.append(String.format("%02X ", buffer.get()));
+ }
+ return builder + (remaining > max ? "..." : "");
+ }
+
+ public static String bytes2Hex(ByteBuffer buffer) {
+ return bytes2Hex(buffer, buffer.remaining());
}
/**
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java
index 301665cb63..ad815e1e49 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/Codec.java
@@ -44,6 +44,14 @@ public interface Codec<T> {
return false;
}
+ /**
+ * @return an upper bound, which should be obtained without serialization,
+ * of the serialized size of the given object.
+ */
+ default int getSerializedSizeUpperBound(T object) {
+ throw new UnsupportedOperationException();
+ }
+
/**
* Serialize the given object to bytes.
*
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
index 93f2ed716d..c8550d59c7 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/CodecBuffer.java
@@ -34,6 +34,7 @@ import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.IntFunction;
import java.util.function.ToIntFunction;
/**
@@ -45,46 +46,43 @@ public final class CodecBuffer implements AutoCloseable {
private static final ByteBufAllocator POOL
= PooledByteBufAllocator.DEFAULT;
+ private static final IntFunction<ByteBuf> POOL_DIRECT = c -> c >= 0
+ ? POOL.directBuffer(c, c) // allocate exact size
+ : POOL.directBuffer(-c); // allocate a resizable buffer
+ private static final IntFunction<ByteBuf> POOL_HEAP = c -> c >= 0
+ ? POOL.heapBuffer(c, c) // allocate exact size
+ : POOL.heapBuffer(-c); // allocate a resizable buffer
-
/**
- * Allocate a direct buffer.
- * When the given capacity is non-negative, allocate a buffer by setting
- * the initial capacity and the maximum capacity to the given capacity.
- * When the given capacity is negative, allocate a buffer
- * by setting only the initial capacity to the absolute value of it
- * and then the buffer's capacity can be increased if necessary.
+ * Allocate a buffer using the given allocator.
+ *
+ * @param allocator Take a capacity parameter and return an allocated buffer.
+ * When the capacity is non-negative,
+ * allocate a buffer by setting the initial capacity
+ * and the maximum capacity to the given capacity.
+ * When the capacity is negative,
+ * allocate a buffer by setting only the initial capacity
+ * to the absolute value of it and, as a result,
+ * the buffer's capacity can be increased if necessary.
+ */
+ static CodecBuffer allocate(int capacity, IntFunction<ByteBuf> allocator) {
+ return new CodecBuffer(allocator.apply(capacity));
+ }
+
+ /**
+ * Allocate a pooled direct buffer.
+ * @see #allocate(int, IntFunction)
*/
public static CodecBuffer allocateDirect(int capacity) {
- final ByteBuf buf;
- if (capacity >= 0) {
- // allocate exact size
- buf = POOL.directBuffer(capacity, capacity);
- } else {
- // allocate a resizable buffer
- buf = POOL.directBuffer(-capacity);
- }
- return new CodecBuffer(buf);
+ return allocate(capacity, POOL_DIRECT);
}
/**
- * Allocate a heap buffer.
- * When the given capacity is non-negative, allocate a buffer by setting
- * the initial capacity and the maximum capacity to the given capacity.
- * When the given capacity is negative, allocate a buffer
- * by setting only the initial capacity to the absolute value of it
- * and then the buffer's capacity can be increased if necessary.
+ * Allocate a pooled heap buffer.
+ * @see #allocate(int, IntFunction)
*/
public static CodecBuffer allocateHeap(int capacity) {
- final ByteBuf buf;
- if (capacity >= 0) {
- // allocate exact size
- buf = POOL.heapBuffer(capacity, capacity);
- } else {
- // allocate a resizable buffer
- buf = POOL.heapBuffer(-capacity);
- }
- return new CodecBuffer(buf);
+ return allocate(capacity, POOL_HEAP);
}
/** Wrap the given array. */
@@ -189,6 +187,16 @@ public final class CodecBuffer implements AutoCloseable {
return buf.nioBuffer().asReadOnlyBuffer();
}
+ /**
+ * @return a new array containing the readable bytes.
+ * @see #readableBytes()
+ */
+ public byte[] getArray() {
+ final byte[] array = new byte[readableBytes()];
+ buf.readBytes(array);
+ return array;
+ }
+
/** @return an {@link InputStream} reading from this buffer. */
public InputStream getInputStream() {
return new ByteBufInputStream(buf.duplicate());
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
new file mode 100644
index 0000000000..799b7a9381
--- /dev/null
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodecBase.java
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.hadoop.hdds.utils.db;
+
+import org.apache.hadoop.hdds.StringUtils;
+import org.apache.ratis.util.Preconditions;
+import org.apache.ratis.util.function.CheckedFunction;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.Nonnull;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.util.function.Function;
+import java.util.function.IntFunction;
+
+/**
+ * An abstract {@link Codec} to serialize/deserialize {@link String}
+ * using a {@link Charset} provided by subclasses.
+ */
+abstract class StringCodecBase implements Codec<String> {
+ static final Logger LOG = LoggerFactory.getLogger(StringCodecBase.class);
+
+ private final Charset charset;
+ private final boolean fixedLength;
+ private final int maxBytesPerChar;
+
+ StringCodecBase(Charset charset) {
+ this.charset = charset;
+
+ final CharsetEncoder encoder = charset.newEncoder();
+ final float max = encoder.maxBytesPerChar();
+ this.maxBytesPerChar = (int) max;
+ if (maxBytesPerChar != max) {
+ throw new ArithmeticException("Round off error in " + charset
+ + ": maxBytesPerChar = " + max + " is not an integer.");
+ }
+ this.fixedLength = max == encoder.averageBytesPerChar();
+ }
+
+ CharsetEncoder newEncoder() {
+ return charset.newEncoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+
+ CharsetDecoder newDecoder() {
+ return charset.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+
+ /**
+ * Is this a fixed-length {@link Codec}?
+ * <p>
+ * For a fixed-length {@link Codec},
+ * each character is encoded to the same number of bytes and
+ * {@link #getSerializedSizeUpperBound(String)} equals to the serialized
size.
+ */
+ public boolean isFixedLength() {
+ return fixedLength;
+ }
+
+ /**
+ * @return an upper bound, which can be obtained without encoding,
+ * of the serialized size of the given {@link String}.
+ * When {@link #isFixedLength()} is true,
+ * the upper bound equals to the serialized size.
+ */
+ @Override
+ public int getSerializedSizeUpperBound(String s) {
+ return maxBytesPerChar * s.length();
+ }
+
+ private <E extends Exception> CheckedFunction<ByteBuffer, Integer, E> encode(
+ String string, Integer serializedSize, Function<String, E> newE) {
+ return buffer -> {
+ final CoderResult result = newEncoder().encode(
+ CharBuffer.wrap(string), buffer, true);
+ if (result.isError()) {
+ throw newE.apply("Failed to encode with " + charset + ": " + result
+ + ", string=" + string);
+ }
+ final int remaining = buffer.flip().remaining();
+ if (serializedSize != null && serializedSize != remaining) {
+ throw newE.apply("Size mismatched: Expected size is " + serializedSize
+ + " but actual size is " + remaining + ", string=" + string);
+ }
+ return remaining;
+ };
+ }
+
+ String decode(ByteBuffer buffer) {
+ Runnable error = null;
+ try {
+ return newDecoder().decode(buffer.asReadOnlyBuffer()).toString();
+ } catch (Exception e) {
+ error = () -> LOG.warn("Failed to decode buffer with " + charset
+ + ", buffer = (hex) " + StringUtils.bytes2Hex(buffer), e);
+
+ // For compatibility, try decoding using StringUtils.
+ final String decoded = StringUtils.bytes2String(buffer, charset);
+ // Decoded successfully, update error message.
+ error = () -> LOG.warn("Decode (hex) " + StringUtils.bytes2Hex(buffer,
20)
+ + "\n Attempt failed : " + charset + " (see exception below)"
+ + "\n Retry succeeded: decoded to " + decoded, e);
+ return decoded;
+ } finally {
+ if (error != null) {
+ error.run();
+ }
+ }
+ }
+
+ /** Encode the given {@link String} to a byte array. */
+ <E extends Exception> byte[] string2Bytes(String string,
+ Function<String, E> newE) throws E {
+ final int upperBound = getSerializedSizeUpperBound(string);
+ final Integer serializedSize = isFixedLength() ? upperBound : null;
+ final CheckedFunction<ByteBuffer, Integer, E> encoder
+ = encode(string, serializedSize, newE);
+
+ if (serializedSize != null) {
+ // When the serialized size is known, create an array
+ // and then wrap it as a buffer for encoding.
+ final byte[] array = new byte[serializedSize];
+ final Integer encoded = encoder.apply(ByteBuffer.wrap(array));
+ Preconditions.assertSame(serializedSize, encoded, "serializedSize");
+ return array;
+ } else {
+ // When the serialized size is unknown, allocate a larger buffer
+ // and then get an array.
+ try (CodecBuffer buffer = CodecBuffer.allocateHeap(upperBound)) {
+ buffer.putFromSource(encoder);
+
+ // require a buffer copying
+ // unless upperBound equals to the serialized size.
+ return buffer.getArray();
+ }
+ }
+ }
+
+ @Override
+ public boolean supportCodecBuffer() {
+ return true;
+ }
+
+ @Override
+ public CodecBuffer toCodecBuffer(@Nonnull String object,
+ IntFunction<CodecBuffer> allocator) throws IOException {
+ // allocate a larger buffer to avoid encoding twice.
+ final int upperBound = getSerializedSizeUpperBound(object);
+ final CodecBuffer buffer = allocator.apply(upperBound);
+ buffer.putFromSource(encode(object, null, IOException::new));
+ return buffer;
+ }
+
+ @Override
+ public String fromCodecBuffer(@Nonnull CodecBuffer buffer)
+ throws IOException {
+ return decode(buffer.asReadOnlyByteBuffer());
+ }
+
+ @Override
+ public byte[] toPersistedFormat(String object) throws IOException {
+ return string2Bytes(object, IOException::new);
+ }
+
+ @Override
+ public String fromPersistedFormat(byte[] bytes) {
+ return decode(ByteBuffer.wrap(bytes));
+ }
+
+ @Override
+ public String copyObject(String object) {
+ return object;
+ }
+}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
index 9dd593c5a4..8853497e2c 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java
@@ -182,7 +182,6 @@ public final class KeyValueContainerUtil {
/**
* Returns if there are no blocks in the container.
* @param containerData Container to check
- * @param conf configuration
* @return true if the directory containing blocks is empty
* @throws IOException
*/
@@ -369,8 +368,8 @@ public final class KeyValueContainerUtil {
blockCount++;
try {
usedBytes += getBlockLength(blockIter.nextBlock());
- } catch (IOException ex) {
- LOG.error(errorMessage);
+ } catch (Exception ex) {
+ LOG.error(errorMessage, ex);
}
}
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java
index 51b5e6c271..745e1153da 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaThreeDBDefinition.java
@@ -22,7 +22,6 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
import org.apache.hadoop.hdds.utils.db.DBColumnFamilyDefinition;
import org.apache.hadoop.hdds.utils.db.DBDefinition;
-import org.apache.hadoop.hdds.utils.db.FixedLengthStringUtils;
import org.apache.hadoop.hdds.utils.db.LongCodec;
import org.apache.hadoop.hdds.utils.db.FixedLengthStringCodec;
import org.apache.hadoop.hdds.utils.db.Proto2Codec;
@@ -42,7 +41,7 @@ import static
org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PRO
* version 3, where the block data, metadata, and transactions which are to be
* deleted are put in their own separate column families and with containerID
* as key prefix.
- *
+ * <p>
* Some key format illustrations for the column families:
* - block_data: containerID | blockID
* - metadata: containerID | #BLOCKCOUNT
@@ -50,7 +49,7 @@ import static
org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PRO
* ...
* - deleted_blocks: containerID | blockID
* - delete_txns: containerID | TransactionID
- *
+ * <p>
* The keys would be encoded in a fix-length encoding style in order to
* utilize the "Prefix Seek" feature from Rocksdb to optimize seek.
*/
@@ -153,19 +152,19 @@ public class DatanodeSchemaThreeDBDefinition
}
public static int getContainerKeyPrefixLength() {
- return FixedLengthStringUtils.string2Bytes(
+ return FixedLengthStringCodec.string2Bytes(
getContainerKeyPrefix(0L)).length;
}
public static String getContainerKeyPrefix(long containerID) {
// NOTE: Rocksdb normally needs a fixed length prefix.
- return FixedLengthStringUtils.bytes2String(Longs.toByteArray(containerID))
+ return FixedLengthStringCodec.bytes2String(Longs.toByteArray(containerID))
+ separator;
}
public static byte[] getContainerKeyPrefixBytes(long containerID) {
// NOTE: Rocksdb normally needs a fixed length prefix.
- return FixedLengthStringUtils.string2Bytes(
+ return FixedLengthStringCodec.string2Bytes(
getContainerKeyPrefix(containerID));
}
@@ -180,7 +179,7 @@ public class DatanodeSchemaThreeDBDefinition
public static long getContainerId(String key) {
int index = getContainerKeyPrefixLength();
String cid = key.substring(0, index);
- return Longs.fromByteArray(FixedLengthStringUtils.string2Bytes(cid));
+ return Longs.fromByteArray(FixedLengthStringCodec.string2Bytes(cid));
}
private void setSeparator(String keySeparator) {
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaThreeImpl.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaThreeImpl.java
index 43739d2a33..ee8580defa 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaThreeImpl.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaThreeImpl.java
@@ -21,7 +21,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
import org.apache.hadoop.hdds.utils.MetadataKeyFilters;
import org.apache.hadoop.hdds.utils.db.BatchOperation;
-import org.apache.hadoop.hdds.utils.db.FixedLengthStringUtils;
+import org.apache.hadoop.hdds.utils.db.FixedLengthStringCodec;
import org.apache.hadoop.hdds.utils.db.RDBStore;
import org.apache.hadoop.hdds.utils.db.RocksDatabase;
import org.apache.hadoop.hdds.utils.db.RocksDatabase.ColumnFamily;
@@ -187,9 +187,9 @@ public class DatanodeStoreSchemaThreeImpl extends
AbstractDatanodeStore
long endCId = Long.MIN_VALUE;
for (LiveFileMetaData file: innerEntry.getValue()) {
long firstCId = DatanodeSchemaThreeDBDefinition.getContainerId(
- FixedLengthStringUtils.bytes2String(file.smallestKey()));
+ FixedLengthStringCodec.bytes2String(file.smallestKey()));
long lastCId = DatanodeSchemaThreeDBDefinition.getContainerId(
- FixedLengthStringUtils.bytes2String(file.largestKey()));
+ FixedLengthStringCodec.bytes2String(file.largestKey()));
startCId = Math.min(firstCId, startCId);
endCId = Math.max(lastCId, endCId);
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneKeyCodec.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneKeyCodec.java
index 4271279437..2f1660f4d2 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneKeyCodec.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/SchemaOneKeyCodec.java
@@ -24,6 +24,8 @@ import org.apache.hadoop.hdds.utils.db.StringCodec;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.IOException;
+
/**
* Containers written using schema version 1 wrote unprefixed block ID keys
* as longs, and metadata or prefixed block IDs as Strings. This was done
@@ -47,7 +49,7 @@ public final class SchemaOneKeyCodec implements Codec<String>
{
}
@Override
- public byte[] toPersistedFormat(String stringObject) {
+ public byte[] toPersistedFormat(String stringObject) throws IOException {
try {
// If the caller's string has no prefix, it should be stored as a long
// to be encoded as a long to be consistent with the schema one
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
index cca0e0da29..2d4add8194 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringCodec.java
@@ -18,44 +18,40 @@
*/
package org.apache.hadoop.hdds.utils.db;
-import java.io.IOException;
+import java.nio.charset.StandardCharsets;
/**
- * Codec to convert a prefixed String to/from byte array.
- * The prefix has to be of fixed-length.
+ * A {@link Codec} to serialize/deserialize {@link String}
+ * using {@link StandardCharsets#ISO_8859_1},
+ * a fixed-length one-byte-per-character encoding,
+ * i.e. the serialized size equals to {@link String#length()}.
*/
-public final class FixedLengthStringCodec implements Codec<String> {
+public final class FixedLengthStringCodec extends StringCodecBase {
- private static final Codec<String> INSTANCE = new FixedLengthStringCodec();
+ private static final FixedLengthStringCodec INSTANCE
+ = new FixedLengthStringCodec();
- public static Codec<String> get() {
+ public static FixedLengthStringCodec get() {
return INSTANCE;
}
private FixedLengthStringCodec() {
// singleton
+ super(StandardCharsets.ISO_8859_1);
}
- @Override
- public byte[] toPersistedFormat(String object) throws IOException {
- if (object != null) {
- return FixedLengthStringUtils.string2Bytes(object);
- } else {
- return null;
- }
+ /**
+ * Encode the given {@link String} to a byte array.
+ * @throws IllegalStateException in case an encoding error occurs.
+ */
+ public static byte[] string2Bytes(String string) {
+ return get().string2Bytes(string, IllegalStateException::new);
}
- @Override
- public String fromPersistedFormat(byte[] rawData) throws IOException {
- if (rawData != null) {
- return FixedLengthStringUtils.bytes2String(rawData);
- } else {
- return null;
- }
- }
-
- @Override
- public String copyObject(String object) {
- return object;
+ /**
+ * Decode the given byte array to a {@link String}.
+ */
+ public static String bytes2String(byte[] bytes) {
+ return get().fromPersistedFormat(bytes);
}
}
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringUtils.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringUtils.java
deleted file mode 100644
index 31eee1c569..0000000000
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/FixedLengthStringUtils.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdds.utils.db;
-
-import java.io.UnsupportedEncodingException;
-import java.nio.charset.StandardCharsets;
-
-/**
- * String utility class for conversion between byte[] and string
- * which requires string to be of non-variable-length encoding(e.g. ASCII).
- * This is different from StringUtils which uses UTF-8 encoding which is
- * a variable-length encoding style.
- * This is mainly for FixedLengthStringCodec which requires a fixed-length
- * prefix.
- */
-public final class FixedLengthStringUtils {
-
- private FixedLengthStringUtils() {
- }
-
- // An ASCII extension: https://en.wikipedia.org/wiki/ISO/IEC_8859-1
- // Each character is encoded as a single eight-bit code value.
- private static final String ASCII_CSN = StandardCharsets.ISO_8859_1.name();
-
- public static String bytes2String(byte[] bytes) {
- try {
- return new String(bytes, 0, bytes.length, ASCII_CSN);
- } catch (UnsupportedEncodingException e) {
- throw new IllegalArgumentException(
- "ISO_8859_1 encoding is not supported", e);
- }
- }
-
- public static byte[] string2Bytes(String str) {
- try {
- return str.getBytes(ASCII_CSN);
- } catch (UnsupportedEncodingException e) {
- throw new IllegalArgumentException(
- "ISO_8859_1 decoding is not supported", e);
- }
- }
-}
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
index 531381714e..18c736914f 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/StringCodec.java
@@ -18,15 +18,14 @@
*/
package org.apache.hadoop.hdds.utils.db;
-import java.util.function.IntFunction;
-import javax.annotation.Nonnull;
-
-import org.apache.hadoop.hdds.StringUtils;
+import java.nio.charset.StandardCharsets;
/**
- * Codec to serialize/deserialize {@link String}.
+ * A {@link Codec} to serialize/deserialize {@link String}
+ * using {@link StandardCharsets#UTF_8},
+ * a variable-length character encoding.
*/
-public final class StringCodec implements Codec<String> {
+public final class StringCodec extends StringCodecBase {
private static final StringCodec CODEC = new StringCodec();
public static StringCodec get() {
@@ -35,45 +34,6 @@ public final class StringCodec implements Codec<String> {
private StringCodec() {
// singleton
- }
-
- @Override
- public boolean supportCodecBuffer() {
- return true;
- }
-
- @Override
- public CodecBuffer toCodecBuffer(@Nonnull String object,
- IntFunction<CodecBuffer> allocator) {
- final byte[] array = toPersistedFormat(object);
- return allocator.apply(array.length).put(array);
- }
-
- @Override
- public String fromCodecBuffer(@Nonnull CodecBuffer buffer) {
- return StringUtils.bytes2String(buffer.asReadOnlyByteBuffer());
- }
-
- @Override
- public byte[] toPersistedFormat(String object) {
- if (object != null) {
- return StringUtils.string2Bytes(object);
- } else {
- return null;
- }
- }
-
- @Override
- public String fromPersistedFormat(byte[] rawData) {
- if (rawData != null) {
- return StringUtils.bytes2String(rawData);
- } else {
- return null;
- }
- }
-
- @Override
- public String copyObject(String object) {
- return object;
+ super(StandardCharsets.UTF_8);
}
}
diff --git
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
index 52aec7e835..7060a30daa 100644
---
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
+++
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestCodec.java
@@ -22,6 +22,7 @@ import com.google.common.primitives.Longs;
import com.google.common.primitives.Shorts;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.function.Executable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -32,6 +33,7 @@ import java.lang.ref.WeakReference;
import java.nio.ByteBuffer;
import java.util.UUID;
import java.util.concurrent.ThreadLocalRandom;
+import java.util.function.Consumer;
import static java.nio.charset.StandardCharsets.UTF_8;
@@ -147,6 +149,7 @@ public final class TestCodec {
@Test
public void testStringCodec() throws Exception {
+ Assertions.assertFalse(StringCodec.get().isFixedLength());
runTestStringCodec("");
for (int i = 0; i < NUM_LOOPS; i++) {
@@ -189,6 +192,51 @@ public final class TestCodec {
return serializedSize;
}
+ @Test
+ public void testFixedLengthStringCodec() throws Exception {
+ Assertions.assertTrue(FixedLengthStringCodec.get().isFixedLength());
+ runTestFixedLengthStringCodec("");
+
+ for (int i = 0; i < NUM_LOOPS; i++) {
+ final String original = "test" + ThreadLocalRandom.current().nextLong();
+ runTestFixedLengthStringCodec(original);
+ }
+
+ final String alphabets = "AbcdEfghIjklmnOpqrstUvwxyz";
+ for (int i = 0; i < NUM_LOOPS; i++) {
+ final String original = i == 0 ? alphabets : alphabets.substring(0, i);
+ runTestFixedLengthStringCodec(original);
+ }
+
+
+ final String multiByteChars = "Ozone 是 Hadoop 的分布式对象存储系统,具有易扩展和冗余存储的特点。";
+ Assertions.assertThrows(IOException.class,
+ tryCatch(() -> runTestFixedLengthStringCodec(multiByteChars)));
+ Assertions.assertThrows(IllegalStateException.class,
+ tryCatch(() -> FixedLengthStringCodec.string2Bytes(multiByteChars)));
+
+ gc();
+ }
+
+ static Executable tryCatch(Executable executable) {
+ return tryCatch(executable, t -> LOG.info("Good!", t));
+ }
+
+ static Executable tryCatch(Executable executable, Consumer<Throwable> log) {
+ return () -> {
+ try {
+ executable.execute();
+ } catch (Throwable t) {
+ log.accept(t);
+ throw t;
+ }
+ };
+ }
+
+ static void runTestFixedLengthStringCodec(String original) throws Exception {
+ runTest(FixedLengthStringCodec.get(), original, original.length());
+ }
+
@Test
public void testUuidCodec() throws Exception {
final int size = UuidCodec.getSerializedSize();
diff --git
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringUtils.java
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringCodec.java
similarity index 86%
rename from
hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringUtils.java
rename to
hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringCodec.java
index b6d1e6a664..e71eae6337 100644
---
a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringUtils.java
+++
b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestFixedLengthStringCodec.java
@@ -23,9 +23,9 @@ import org.junit.Test;
import static org.junit.Assert.assertEquals;
/**
- * Test for class FixedLengthStringUtils.
+ * Test for class {@link FixedLengthStringCodec}.
*/
-public class TestFixedLengthStringUtils {
+public class TestFixedLengthStringCodec {
@Test
public void testStringEncodeAndDecode() {
@@ -35,10 +35,10 @@ public class TestFixedLengthStringUtils {
};
for (long containerID : testContainerIDs) {
- String containerPrefix = FixedLengthStringUtils.bytes2String(
+ String containerPrefix = FixedLengthStringCodec.bytes2String(
Longs.toByteArray(containerID));
long decodedContainerID = Longs.fromByteArray(
- FixedLengthStringUtils.string2Bytes(containerPrefix));
+ FixedLengthStringCodec.string2Bytes(containerPrefix));
assertEquals(containerID, decodedContainerID);
}
}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java
index 338ce144c7..be4f64bf8c 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.utils.db.DBStore;
import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
-import org.apache.hadoop.hdds.utils.db.FixedLengthStringUtils;
+import org.apache.hadoop.hdds.utils.db.FixedLengthStringCodec;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.ClientVersion;
import org.apache.hadoop.ozone.OzoneConsts;
@@ -303,7 +303,7 @@ public class TestLDBCli {
if (schemaV3) {
String dbKeyStr = DatanodeSchemaThreeDBDefinition
.getContainerKeyPrefix(cid) + blockId;
- dbKey = FixedLengthStringUtils.string2Bytes(dbKeyStr);
+ dbKey = FixedLengthStringCodec.string2Bytes(dbKeyStr);
// Schema V3 ldb scan output key is "containerId: blockId"
mapKey = cid + keySeparatorSchemaV3 + blockId;
} else {
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
index fc215d2fb3..f4fc52fc12 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
@@ -1698,7 +1698,7 @@ public class KeyManagerImpl implements KeyManager {
}
private String getNextGreaterString(String volumeName, String bucketName,
- String keyPrefix) {
+ String keyPrefix) throws IOException {
// Increment the last character of the string and return the new ozone key.
Preconditions.checkArgument(!Strings.isNullOrEmpty(keyPrefix),
"Key prefix is null or empty");
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java
index 9146abfbc9..eba61c9ddb 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DBScanner.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
import org.apache.hadoop.hdds.utils.db.DBColumnFamilyDefinition;
import org.apache.hadoop.hdds.utils.db.DBDefinition;
-import org.apache.hadoop.hdds.utils.db.FixedLengthStringUtils;
+import org.apache.hadoop.hdds.utils.db.FixedLengthStringCodec;
import org.apache.hadoop.hdds.utils.db.LongCodec;
import org.apache.hadoop.hdds.utils.db.RocksDatabase;
import org.apache.hadoop.hdds.utils.db.managed.ManagedReadOptions;
@@ -225,7 +225,7 @@ public class DBScanner implements Callable<Void>,
SubcommandWithParent {
String cid = keyStr.substring(0, index);
String blockId = keyStr.substring(index);
sb.append(gson.toJson(LongCodec.get().fromPersistedFormat(
- FixedLengthStringUtils.string2Bytes(cid)) +
+ FixedLengthStringCodec.string2Bytes(cid)) +
keySeparatorSchemaV3 +
blockId));
} else {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]