hemantk-12 commented on code in PR #4584: URL: https://github.com/apache/ozone/pull/4584#discussion_r1191591228
########## hadoop-hdds/rocks-native/src/test/java/org/apache/hadoop/hdds/utils/db/managed/TestManagedSSTDumpIterator.java: ########## @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.utils.db.managed; + +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.io.File; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.Optional; +import java.util.TreeMap; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +/** + * Test for ManagedSSTDumpIterator. + */ +public class TestManagedSSTDumpIterator { + + private void testSSTDumpIteratorWithKeys( Review Comment: nit: I think it would look nicer if done this way. There is not need to implement custom `ArgumentsProvider`. ```suggestion private static Stream<Arguments> sstDumpToolScenarios() { return Stream.of( Arguments.of("'key%1$d=>", "%1$dvalue'"), Arguments.of("key%1$d", "%1$dvalue%1$d"), Arguments.of("'key%1$d", "%1$d'value%1$d'"), Arguments.of("'key%1$d", "%1$d'value%1$d'"), Arguments.of("key%1$d", "%1$dvalue\n\0%1$d"), Arguments.of("key\0%1$d", "%1$dvalue\r%1$d") ); } private void populateSstFile(File file, TreeMap<Pair<String, Integer>, String> keys) throws RocksDBException { try (ManagedEnvOptions envOptions = new ManagedEnvOptions(); ManagedOptions managedOptions = new ManagedOptions(); ManagedSstFileWriter sstFileWriter = new ManagedSstFileWriter( envOptions, managedOptions)) { sstFileWriter.open(file.getAbsolutePath()); for (Map.Entry<Pair<String, Integer>, String> entry : keys.entrySet()) { if (entry.getKey().getValue() == 0) { sstFileWriter.delete(entry.getKey().getKey() .getBytes(StandardCharsets.UTF_8)); } else { sstFileWriter.put(entry.getKey().getKey() .getBytes(StandardCharsets.UTF_8), entry.getValue().getBytes(StandardCharsets.UTF_8)); } } sstFileWriter.finish(); } } @Native("Managed Rocks Tools") @ParameterizedTest @MethodSource("sstDumpToolScenarios") public void testSSTDumpIteratorWithKeyFormat(String keyFormat, String valueFormat) throws IOException, NativeLibraryNotLoadedException, RocksDBException { TreeMap<Pair<String, Integer>, String> keys = IntStream.range(0, 100) .boxed() .collect( Collectors.toMap( i -> Pair.of(String.format(keyFormat, i), i % 2), i -> i % 2 == 0 ? "" : String.format(valueFormat, i), (v1, v2) -> v2, TreeMap::new) ); File file = File.createTempFile("tmp_sst_file", ".sst"); file.deleteOnExit(); populateSstFile(file, keys); ExecutorService executorService = new ThreadPoolExecutor(1, 1, 0, SECONDS, new ArrayBlockingQueue<>(1), new CallerRunsPolicy()); ManagedSSTDumpTool tool = new ManagedSSTDumpTool(executorService, 8192); ManagedSSTDumpIterator<ManagedSSTDumpIterator.KeyValue> iterator = null; try { iterator = new ManagedSSTDumpIterator<ManagedSSTDumpIterator.KeyValue>( tool, file.getAbsolutePath(), new ManagedOptions() ) { @Override protected KeyValue getTransformedValue( Optional<KeyValue> value) { return value.orElse(null); } }; while (iterator.hasNext()) { ManagedSSTDumpIterator.KeyValue r = iterator.next(); Pair<String, Integer> recordKey = Pair.of(new String(r.getKey(), StandardCharsets.UTF_8), r.getType()); Assertions.assertTrue(keys.containsKey(recordKey)); Assertions.assertEquals( Optional.ofNullable(keys.get(recordKey)).orElse(""), new String(r.getValue(), StandardCharsets.UTF_8)); keys.remove(recordKey); } Assertions.assertEquals(0, keys.size()); } finally { IOUtils.closeQuietly(iterator); executorService.shutdown(); } } ``` ########## hadoop-hdds/rocks-native/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSSTDumpIterator.java: ########## @@ -86,51 +79,47 @@ public ManagedSSTDumpIterator(ManagedSSTDumpTool sstDumpTool, * @return Optional of the integer empty if no integer exists */ private Optional<Integer> getNextNumberInStream() throws IOException { - StringBuilder value = new StringBuilder(); - int val; - while ((val = processOutput.read()) != -1) { - if (val >= '0' && val <= '9') { - value.append((char) val); - } else if (value.length() > 0) { - break; - } + int n = processOutput.read(intBuffer, 0, 4); + if (n == 4) { + return Optional.of(ByteBuffer.wrap(intBuffer).getInt()); } - return value.length() > 0 ? Optional.of(Integer.valueOf(value.toString())) - : Optional.empty(); + return Optional.empty(); } - /** - * Reads the next n chars from the stream & makes a string. - * - * @param numberOfChars - * @return String of next chars read - * @throws IOException - */ - private String readNextNumberOfCharsFromStream(int numberOfChars) - throws IOException { - StringBuilder value = new StringBuilder(); - while (numberOfChars > 0) { - int noOfCharsRead = processOutput.read(charBuffer, 0, - Math.min(numberOfChars, charBuffer.length)); - if (noOfCharsRead == -1) { - break; - } - value.append(charBuffer, 0, noOfCharsRead); - numberOfChars -= noOfCharsRead; + private Optional<byte[]> getNextByteArray() throws IOException { + Optional<Integer> size = getNextNumberInStream(); + if (size.isPresent()) { + byte[] b = new byte[size.get()]; + int n = processOutput.read(b); + return n != size.get() ? Optional.empty() : Optional.of(b); } + return Optional.empty(); + } - return value.toString(); + private Optional<UnsignedLong> getNextUnsignedLong() { + long val = 0; + for (int i = 0; i < 8; i++) { + val = val << 8; + int nextByte = processOutput.read(); + if (nextByte < 0) { + return Optional.empty(); + } + val += nextByte; + } + return Optional.of(UnsignedLong.fromLongBits(val)); } private void init(ManagedSSTDumpTool sstDumpTool, File sstFile, ManagedOptions options) throws NativeLibraryNotLoadedException { - String[] args = {"--file=" + sstFile.getAbsolutePath(), "--command=scan"}; + String[] args = {"--file=" + sstFile.getAbsolutePath(), "--command=scan", Review Comment: ```suggestion String[] args = {"--file=" + sstFile.getAbsolutePath(), "--command=scan", "--silent"}; ``` ########## hadoop-hdds/rocks-native/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSSTDumpIterator.java: ########## @@ -180,33 +169,25 @@ public boolean hasNext() { public T next() { checkSanityOfProcess(); currentKey = nextKey; - nextKey = null; - boolean keyFound = false; - while (!keyFound) { - try { - Optional<Integer> keyLength = getNextNumberInStream(); - if (!keyLength.isPresent()) { - return getTransformedValue(currentKey); - } - String keyStr = readNextNumberOfCharsFromStream(keyLength.get()); - Matcher matcher = PATTERN_MATCHER.matcher(keyStr); - if (keyStr.length() == keyLength.get() && matcher.find()) { - Optional<Integer> valueLength = getNextNumberInStream(); - if (valueLength.isPresent()) { - String valueStr = readNextNumberOfCharsFromStream( - valueLength.get()); - if (valueStr.length() == valueLength.get()) { - keyFound = true; - nextKey = new KeyValue(matcher.group(PATTERN_KEY_GROUP_NUMBER), - matcher.group(PATTERN_SEQ_GROUP_NUMBER), - matcher.group(PATTERN_TYPE_GROUP_NUMBER), - valueStr); + nextKey = Optional.empty(); + try { Review Comment: nit: next() has too much nesting and could be simplified to this. ```suggestion try { Optional<byte[]> key = getNextByteArray(); if (!key.isPresent()) { return getTransformedValue(currentKey); } Optional<UnsignedLong> sequenceNumber = getNextUnsignedLong(); if (!sequenceNumber.isPresent()) { return getTransformedValue(currentKey); } Optional<Integer> type = getNextNumberInStream(); if (!type.isPresent()) { return getTransformedValue(currentKey); } Optional<byte[]> value = getNextByteArray(); value.ifPresent(bytes -> nextKey = Optional.of( new KeyValue(key.get(), sequenceNumber.get(), type.get(), bytes))); return getTransformedValue(currentKey); } catch (IOException e) { throw new RuntimeIOException(e); } ``` ########## hadoop-hdds/rocks-native/src/test/java/org/apache/hadoop/hdds/utils/db/managed/TestManagedSSTDumpIterator.java: ########## @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.utils.db.managed; + +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.provider.ArgumentsSource; + +import java.io.File; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.Optional; +import java.util.TreeMap; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +/** + * Test for ManagedSSTDumpIterator. + */ +public class TestManagedSSTDumpIterator { + + private void testSSTDumpIteratorWithKeys( + TreeMap<Pair<String, Integer>, String> keys) throws Exception { + File file = File.createTempFile("tmp_sst_file", ".sst"); + file.deleteOnExit(); + try (ManagedEnvOptions envOptions = new ManagedEnvOptions(); + ManagedOptions managedOptions = new ManagedOptions(); + ManagedSstFileWriter sstFileWriter = new ManagedSstFileWriter( + envOptions, managedOptions)) { + sstFileWriter.open(file.getAbsolutePath()); + for (Map.Entry<Pair<String, Integer>, String> entry : keys.entrySet()) { + if (entry.getKey().getValue() == 0) { + sstFileWriter.delete(entry.getKey().getKey() + .getBytes(StandardCharsets.UTF_8)); + } else { + sstFileWriter.put(entry.getKey().getKey() + .getBytes(StandardCharsets.UTF_8), + entry.getValue().getBytes(StandardCharsets.UTF_8)); + } + } + sstFileWriter.finish(); + sstFileWriter.close(); + ExecutorService executorService = + new ThreadPoolExecutor(1, 1, 0, TimeUnit.SECONDS, + new ArrayBlockingQueue<>(1), + new ThreadPoolExecutor.CallerRunsPolicy()); + ManagedSSTDumpTool tool = new ManagedSSTDumpTool(executorService, 8192); + try (ManagedSSTDumpIterator<ManagedSSTDumpIterator.KeyValue> iterator = + new ManagedSSTDumpIterator<ManagedSSTDumpIterator.KeyValue>( + tool, file.getAbsolutePath(), new ManagedOptions()) { + + @Override + protected KeyValue getTransformedValue(Optional<KeyValue> value) { + return value.orElse(null); + } + }) { + while (iterator.hasNext()) { + ManagedSSTDumpIterator.KeyValue r = iterator.next(); + Pair<String, Integer> recordKey = Pair.of(new String(r.getKey(), + StandardCharsets.UTF_8), r.getType()); + Assertions.assertTrue(keys.containsKey(recordKey)); + Assertions.assertEquals( + Optional.ofNullable(keys.get(recordKey)).orElse(""), + new String(r.getValue(), StandardCharsets.UTF_8)); + keys.remove(recordKey); + } + Assertions.assertEquals(0, keys.size()); + } + executorService.shutdown(); + } + } + + @Native("Managed Rocks Tools") + @ParameterizedTest + @ArgumentsSource(KeyValueFormatArgumentProvider.class) + public void testSSTDumpIteratorWithKeyFormat(String keyFormat, + String valueFormat) throws Exception { + TreeMap<Pair<String, Integer>, String> keys = + IntStream.range(0, 100).boxed().collect( + Collectors.toMap( + i -> Pair.of(String.format(keyFormat, i), i % 2), + i -> i % 2 == 0 ? "" : String.format(valueFormat, i), + (v1, v2) -> v2, + TreeMap::new)); + testSSTDumpIteratorWithKeys(keys); + } +} + +class KeyValueFormatArgumentProvider implements ArgumentsProvider { + @Override + public Stream<? extends Arguments> provideArguments( + ExtensionContext context) { + return Stream.of( + Arguments.of("'key%1$d=>", "%1$dvalue'"), Review Comment: nit: You can also add description of the test similar to [this](https://github.com/apache/ozone/blob/master/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestLDBCli.java#L119) or [this](https://github.com/apache/ozone/blob/3858cd102bbc3c328c24261d929c6549f5358a40/hadoop-hdds/rocksdb-checkpoint-differ/src/test/java/org/apache/ozone/rocksdiff/TestRocksDBCheckpointDiffer.java#L195) so that reader will know what you are testing from the description. ########## hadoop-hdds/rocks-native/src/main/java/org/apache/hadoop/hdds/utils/NativeLibraryLoader.java: ########## @@ -132,15 +134,17 @@ private Optional<File> copyResourceFromJarToTemp(final String libraryName) } // create a temporary file to copy the library to - final File temp = File.createTempFile(libraryName, getLibOsSuffix()); + final File temp = File.createTempFile(libraryName, getLibOsSuffix(), + new File("")); Review Comment: Should this be current dir or some root dir instead of `""`? I don't know empty means current? ########## hadoop-hdds/rocks-native/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSSTDumpIterator.java: ########## @@ -244,40 +221,45 @@ protected void finalize() throws Throwable { * Class containing Parsed KeyValue Record from Sst Dumptool output. */ public static final class KeyValue { - private String key; - private Integer sequence; + private byte[] key; + private UnsignedLong sequence; private Integer type; + private byte[] value; - private String value; - - private KeyValue(String key, String sequence, String type, - String value) { + private KeyValue(byte[] key, UnsignedLong sequence, Integer type, + byte[] value) { this.key = key; - this.sequence = Integer.valueOf(sequence); - this.type = Integer.valueOf(type); + this.sequence = sequence; + this.type = type; this.value = value; } - public String getKey() { + @SuppressFBWarnings("EI_EXPOSE_REP") + public byte[] getKey() { return key; } - public Integer getSequence() { + public UnsignedLong getSequence() { return sequence; } public Integer getType() { return type; } - public String getValue() { + @SuppressFBWarnings("EI_EXPOSE_REP") + public byte[] getValue() { return value; } @Override public String toString() { - return "KeyValue{" + "key='" + key + '\'' + ", sequence=" + sequence + - ", type=" + type + ", value='" + value + '\'' + '}'; + return "KeyValue{" + + "key=" + Arrays.toString(key) + + ", sequence=" + sequence + + ", type=" + type + + ", value=" + Arrays.toString(value) + Review Comment: Same as above. ########## hadoop-hdds/rocks-native/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSSTDumpIterator.java: ########## @@ -180,33 +169,25 @@ public boolean hasNext() { public T next() { checkSanityOfProcess(); currentKey = nextKey; - nextKey = null; - boolean keyFound = false; - while (!keyFound) { - try { - Optional<Integer> keyLength = getNextNumberInStream(); - if (!keyLength.isPresent()) { - return getTransformedValue(currentKey); - } - String keyStr = readNextNumberOfCharsFromStream(keyLength.get()); - Matcher matcher = PATTERN_MATCHER.matcher(keyStr); - if (keyStr.length() == keyLength.get() && matcher.find()) { - Optional<Integer> valueLength = getNextNumberInStream(); - if (valueLength.isPresent()) { - String valueStr = readNextNumberOfCharsFromStream( - valueLength.get()); - if (valueStr.length() == valueLength.get()) { - keyFound = true; - nextKey = new KeyValue(matcher.group(PATTERN_KEY_GROUP_NUMBER), - matcher.group(PATTERN_SEQ_GROUP_NUMBER), - matcher.group(PATTERN_TYPE_GROUP_NUMBER), - valueStr); + nextKey = Optional.empty(); + try { Review Comment: nit: next() has too much nesting and could be simplified to this. ```suggestion try { Optional<byte[]> key = getNextByteArray(); if (!key.isPresent()) { return getTransformedValue(currentKey); } Optional<UnsignedLong> sequenceNumber = getNextUnsignedLong(); if (!sequenceNumber.isPresent()) { return getTransformedValue(currentKey); } Optional<Integer> type = getNextNumberInStream(); if (!type.isPresent()) { return getTransformedValue(currentKey); } Optional<byte[]> value = getNextByteArray(); value.ifPresent(bytes -> nextKey = Optional.of( new KeyValue(key.get(), sequenceNumber.get(), type.get(), bytes))); return getTransformedValue(currentKey); } catch (IOException e) { throw new RuntimeIOException(e); } ``` ########## hadoop-hdds/rocks-native/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSSTDumpIterator.java: ########## @@ -86,51 +79,47 @@ public ManagedSSTDumpIterator(ManagedSSTDumpTool sstDumpTool, * @return Optional of the integer empty if no integer exists */ private Optional<Integer> getNextNumberInStream() throws IOException { - StringBuilder value = new StringBuilder(); - int val; - while ((val = processOutput.read()) != -1) { - if (val >= '0' && val <= '9') { - value.append((char) val); - } else if (value.length() > 0) { - break; - } + int n = processOutput.read(intBuffer, 0, 4); + if (n == 4) { Review Comment: Is it considered as data corruption if bytes read are in between 1-3? ########## hadoop-hdds/rocks-native/src/main/java/org/apache/hadoop/hdds/utils/db/managed/PipeInputStream.java: ########## @@ -65,11 +65,13 @@ public int read() { this.close(); return -1; } - if (numberOfBytesLeftToRead == 0) { + while (numberOfBytesLeftToRead == 0) { numberOfBytesLeftToRead = readInternal(byteBuffer, capacity, nativeHandle); index = 0; - return read(); + if (numberOfBytesLeftToRead != 0) { Review Comment: Will `readInternal()` return -1 when there is not more bytes to read? ########## hadoop-hdds/rocks-native/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSSTDumpIterator.java: ########## @@ -244,40 +221,45 @@ protected void finalize() throws Throwable { * Class containing Parsed KeyValue Record from Sst Dumptool output. */ public static final class KeyValue { - private String key; - private Integer sequence; + private byte[] key; + private UnsignedLong sequence; private Integer type; + private byte[] value; Review Comment: Make all fields final. ########## hadoop-hdds/rocks-native/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSSTDumpIterator.java: ########## @@ -244,40 +221,45 @@ protected void finalize() throws Throwable { * Class containing Parsed KeyValue Record from Sst Dumptool output. */ public static final class KeyValue { - private String key; - private Integer sequence; + private byte[] key; + private UnsignedLong sequence; private Integer type; + private byte[] value; - private String value; - - private KeyValue(String key, String sequence, String type, - String value) { + private KeyValue(byte[] key, UnsignedLong sequence, Integer type, + byte[] value) { this.key = key; - this.sequence = Integer.valueOf(sequence); - this.type = Integer.valueOf(type); + this.sequence = sequence; + this.type = type; this.value = value; } - public String getKey() { + @SuppressFBWarnings("EI_EXPOSE_REP") + public byte[] getKey() { return key; } - public Integer getSequence() { + public UnsignedLong getSequence() { return sequence; } public Integer getType() { return type; } - public String getValue() { + @SuppressFBWarnings("EI_EXPOSE_REP") + public byte[] getValue() { return value; } @Override public String toString() { - return "KeyValue{" + "key='" + key + '\'' + ", sequence=" + sequence + - ", type=" + type + ", value='" + value + '\'' + '}'; + return "KeyValue{" + + "key=" + Arrays.toString(key) + Review Comment: I think `StringUtils.bytes2String(key)` should be use to make it human readable. ```suggestion "key=" + StringUtils.bytes2String(key) + ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
