Galsza commented on code in PR #4424:
URL: https://github.com/apache/ozone/pull/4424#discussion_r1143373448


##########
hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBBatchOperation.java:
##########
@@ -21,15 +21,193 @@
 import org.apache.hadoop.hdds.utils.db.RocksDatabase.ColumnFamily;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteBatch;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions;
+import org.apache.ratis.util.StringUtils;
+import org.apache.ratis.util.TraditionalBinaryPrefix;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Supplier;
 
 /**
  * Batch operation implementation for rocks db.
  */
 public class RDBBatchOperation implements BatchOperation {
+  static final Logger LOG = LoggerFactory.getLogger(RDBBatchOperation.class);
 
+  static void debug(Supplier<String> message) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("\n{}", message.get());
+    }
+  }
+
+  static String byteSize2String(int length) {
+    return TraditionalBinaryPrefix.long2String(length, "B", 3);
+  }
+
+  static String countSize2String(int count, int size) {
+    return count + " (" + byteSize2String(size) + ")";
+  }
+
+  /**
+   * To implement {@link #equals(Object)} and {@link #hashCode()}
+   * based on the contents of {@link #bytes}.
+   * <p>
+   * Note that it is incorrect to use {@link #bytes#equals(Object)}
+   * and {@link #bytes#hashCode()} here since
+   * they do not use the contents of {@link #bytes} in the computations.
+   * These methods simply inherit from {@link Object).
+   */
+  static final class ByteArray {
+    private final byte[] bytes;
+
+    ByteArray(byte[] bytes) {
+      this.bytes = bytes;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      } else if (obj == null || getClass() != obj.getClass()) {
+        return false;
+      }
+      final ByteArray that = (ByteArray) obj;
+      return Arrays.equals(this.bytes, that.bytes);
+    }
+
+    @Override
+    public int hashCode() {
+      return Arrays.hashCode(bytes);
+    }
+  }
+
+  class PutOpCache {
+    class FamilyCache {
+      private final ColumnFamily family;
+      /** A (key -> value) map. */
+      private final Map<ByteArray, byte[]> putOps = new HashMap<>();
+
+      private int batchSize;
+      private int discardedSize;
+      private int discardedCount;
+      private int putCount;
+      private int delCount;
+
+      FamilyCache(ColumnFamily family) {
+        this.family = family;
+      }
+
+      /** Batch put the entire family cache. */
+      void batchPut() throws IOException {
+        for (Map.Entry<ByteArray, byte[]> op : putOps.entrySet()) {
+          family.batchPut(writeBatch, op.getKey().bytes, op.getValue());
+        }

Review Comment:
   If I understand correctly, the batchSize, putCount etc. values should be 
reset once the batchPut operation is actually invoked. Should it be done here?



##########
hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBBatchOperation.java:
##########
@@ -21,15 +21,193 @@
 import org.apache.hadoop.hdds.utils.db.RocksDatabase.ColumnFamily;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteBatch;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions;
+import org.apache.ratis.util.StringUtils;
+import org.apache.ratis.util.TraditionalBinaryPrefix;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Supplier;
 
 /**
  * Batch operation implementation for rocks db.
  */
 public class RDBBatchOperation implements BatchOperation {
+  static final Logger LOG = LoggerFactory.getLogger(RDBBatchOperation.class);
 
+  static void debug(Supplier<String> message) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("\n{}", message.get());
+    }
+  }
+
+  static String byteSize2String(int length) {
+    return TraditionalBinaryPrefix.long2String(length, "B", 3);
+  }
+
+  static String countSize2String(int count, int size) {
+    return count + " (" + byteSize2String(size) + ")";
+  }
+
+  /**
+   * To implement {@link #equals(Object)} and {@link #hashCode()}
+   * based on the contents of {@link #bytes}.
+   * <p>
+   * Note that it is incorrect to use {@link #bytes#equals(Object)}
+   * and {@link #bytes#hashCode()} here since
+   * they do not use the contents of {@link #bytes} in the computations.
+   * These methods simply inherit from {@link Object).
+   */
+  static final class ByteArray {
+    private final byte[] bytes;
+
+    ByteArray(byte[] bytes) {
+      this.bytes = bytes;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      } else if (obj == null || getClass() != obj.getClass()) {
+        return false;
+      }
+      final ByteArray that = (ByteArray) obj;
+      return Arrays.equals(this.bytes, that.bytes);
+    }
+
+    @Override
+    public int hashCode() {
+      return Arrays.hashCode(bytes);
+    }
+  }
+
+  class PutOpCache {
+    class FamilyCache {
+      private final ColumnFamily family;
+      /** A (key -> value) map. */
+      private final Map<ByteArray, byte[]> putOps = new HashMap<>();
+
+      private int batchSize;
+      private int discardedSize;
+      private int discardedCount;
+      private int putCount;
+      private int delCount;
+
+      FamilyCache(ColumnFamily family) {
+        this.family = family;
+      }
+
+      /** Batch put the entire family cache. */
+      void batchPut() throws IOException {
+        for (Map.Entry<ByteArray, byte[]> op : putOps.entrySet()) {
+          family.batchPut(writeBatch, op.getKey().bytes, op.getValue());
+        }
+        putOps.clear();
+      }
+
+      void put(byte[] key, byte[] value) {
+        batchSize += key.length + value.length;

Review Comment:
   If batchSize is not reset anywhere this is going to overflow. Also, could 
this overflow even with a reset included? (Will cached put operations ever 
reach 2 gigs in size?)



##########
hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBBatchOperation.java:
##########
@@ -21,15 +21,193 @@
 import org.apache.hadoop.hdds.utils.db.RocksDatabase.ColumnFamily;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteBatch;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions;
+import org.apache.ratis.util.StringUtils;
+import org.apache.ratis.util.TraditionalBinaryPrefix;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Supplier;
 
 /**
  * Batch operation implementation for rocks db.
  */
 public class RDBBatchOperation implements BatchOperation {
+  static final Logger LOG = LoggerFactory.getLogger(RDBBatchOperation.class);
 
+  static void debug(Supplier<String> message) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("\n{}", message.get());
+    }
+  }
+
+  static String byteSize2String(int length) {
+    return TraditionalBinaryPrefix.long2String(length, "B", 3);
+  }
+
+  static String countSize2String(int count, int size) {
+    return count + " (" + byteSize2String(size) + ")";
+  }
+
+  /**
+   * To implement {@link #equals(Object)} and {@link #hashCode()}
+   * based on the contents of {@link #bytes}.
+   * <p>
+   * Note that it is incorrect to use {@link #bytes#equals(Object)}
+   * and {@link #bytes#hashCode()} here since
+   * they do not use the contents of {@link #bytes} in the computations.
+   * These methods simply inherit from {@link Object).
+   */
+  static final class ByteArray {
+    private final byte[] bytes;
+
+    ByteArray(byte[] bytes) {
+      this.bytes = bytes;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      } else if (obj == null || getClass() != obj.getClass()) {
+        return false;
+      }
+      final ByteArray that = (ByteArray) obj;
+      return Arrays.equals(this.bytes, that.bytes);
+    }
+
+    @Override
+    public int hashCode() {
+      return Arrays.hashCode(bytes);
+    }
+  }
+
+  class PutOpCache {
+    class FamilyCache {
+      private final ColumnFamily family;
+      /** A (key -> value) map. */

Review Comment:
   ```suggestion
         /** 
          * The map used to store the key and value byte array pairs.
          * <p>
          * {@link ByteArray} is used for the keys to avoid incorrect map 
          * operations.
          */
   ```



##########
hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBBatchOperation.java:
##########
@@ -21,15 +21,193 @@
 import org.apache.hadoop.hdds.utils.db.RocksDatabase.ColumnFamily;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteBatch;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions;
+import org.apache.ratis.util.StringUtils;
+import org.apache.ratis.util.TraditionalBinaryPrefix;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Supplier;
 
 /**
  * Batch operation implementation for rocks db.
  */
 public class RDBBatchOperation implements BatchOperation {
+  static final Logger LOG = LoggerFactory.getLogger(RDBBatchOperation.class);
 
+  static void debug(Supplier<String> message) {

Review Comment:
   I think this method should be private. Are we going to use it anywhere else?



##########
hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBBatchOperation.java:
##########
@@ -21,15 +21,193 @@
 import org.apache.hadoop.hdds.utils.db.RocksDatabase.ColumnFamily;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteBatch;
 import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions;
+import org.apache.ratis.util.StringUtils;
+import org.apache.ratis.util.TraditionalBinaryPrefix;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Supplier;
 
 /**
  * Batch operation implementation for rocks db.
  */
 public class RDBBatchOperation implements BatchOperation {
+  static final Logger LOG = LoggerFactory.getLogger(RDBBatchOperation.class);
 
+  static void debug(Supplier<String> message) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("\n{}", message.get());
+    }
+  }
+
+  static String byteSize2String(int length) {
+    return TraditionalBinaryPrefix.long2String(length, "B", 3);
+  }
+
+  static String countSize2String(int count, int size) {
+    return count + " (" + byteSize2String(size) + ")";
+  }
+
+  /**
+   * To implement {@link #equals(Object)} and {@link #hashCode()}
+   * based on the contents of {@link #bytes}.
+   * <p>
+   * Note that it is incorrect to use {@link #bytes#equals(Object)}
+   * and {@link #bytes#hashCode()} here since
+   * they do not use the contents of {@link #bytes} in the computations.
+   * These methods simply inherit from {@link Object).
+   */
+  static final class ByteArray {
+    private final byte[] bytes;
+
+    ByteArray(byte[] bytes) {
+      this.bytes = bytes;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      } else if (obj == null || getClass() != obj.getClass()) {
+        return false;
+      }
+      final ByteArray that = (ByteArray) obj;
+      return Arrays.equals(this.bytes, that.bytes);
+    }
+
+    @Override
+    public int hashCode() {
+      return Arrays.hashCode(bytes);
+    }
+  }
+
+  class PutOpCache {
+    class FamilyCache {
+      private final ColumnFamily family;
+      /** A (key -> value) map. */
+      private final Map<ByteArray, byte[]> putOps = new HashMap<>();
+
+      private int batchSize;
+      private int discardedSize;
+      private int discardedCount;
+      private int putCount;
+      private int delCount;
+
+      FamilyCache(ColumnFamily family) {
+        this.family = family;
+      }
+
+      /** Batch put the entire family cache. */
+      void batchPut() throws IOException {
+        for (Map.Entry<ByteArray, byte[]> op : putOps.entrySet()) {
+          family.batchPut(writeBatch, op.getKey().bytes, op.getValue());
+        }
+        putOps.clear();
+      }
+
+      void put(byte[] key, byte[] value) {
+        batchSize += key.length + value.length;
+        putCount++;
+        final byte[] previousValue = putOps.put(new ByteArray(key), value);
+
+        if (previousValue != null) {
+          discardedSize += key.length + previousValue.length;
+          discardedCount++;
+          debug(() -> String.format("Overwriting a previous put(value: %s)",
+              byteSize2String(previousValue.length)));
+        }
+
+        debug(() -> String.format(
+            "%s: %s put(key: %s, value: %s), newPut=%s, %s; key=%s",
+            name, family.getName(), byteSize2String(key.length),
+            byteSize2String(value.length),
+            putCount, getBatchSizeDiscardedString(),
+            StringUtils.bytes2HexString(key).toUpperCase()));
+      }
+
+      void remove(byte[] key) {
+        batchSize += key.length;
+        delCount++;
+        final byte[] removed = putOps.remove(new ByteArray(key));
+        if (removed != null) {
+          discardedSize += key.length + removed.length;
+          discardedCount++;
+          debug(() -> String.format("Removed a previous put(value: %s)",
+              byteSize2String(removed.length)));
+        }
+
+        debug(() -> String.format(
+            "%s: %s delete(key: %s), newDel=%s, %s; key=%s",
+            name, family.getName(), byteSize2String(key.length),
+            delCount, getBatchSizeDiscardedString(),
+            StringUtils.bytes2HexString(key).toUpperCase()));
+      }
+
+      String getBatchSizeDiscardedString() {
+        return String.format("batchSize=%s, discarded: %s",
+            byteSize2String(batchSize),
+            countSize2String(discardedCount, discardedSize));
+      }
+    }
+
+    /** A (family name -> {@link FamilyCache}) map. */
+    private final Map<String, FamilyCache> map = new HashMap<>();

Review Comment:
   I suggest renaming the variable to nameToCacheMap for easier understanding.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to