BelodengKlaus commented on a change in pull request #34913:
URL: https://github.com/apache/spark/pull/34913#discussion_r771180938



##########
File path: 
common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java
##########
@@ -0,0 +1,436 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.kvstore;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.ref.Reference;
+import java.lang.ref.WeakReference;
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+import org.rocksdb.BlockBasedTableConfig;
+import org.rocksdb.Options;
+import org.rocksdb.Statistics;
+import org.rocksdb.WriteBatch;
+import org.rocksdb.WriteOptions;
+
+import org.apache.spark.annotation.Private;
+
+/**
+ * Implementation of KVStore that uses RocksDB as the underlying data store.
+ */
+@Private
+public class RocksDB implements KVStore {
+
+  static {
+    org.rocksdb.RocksDB.loadLibrary();
+  }
+
+  @VisibleForTesting
+  static final long STORE_VERSION = 1L;
+
+  @VisibleForTesting
+  static final byte[] STORE_VERSION_KEY = "__version__".getBytes(UTF_8);
+
+  /** DB key where app metadata is stored. */
+  private static final byte[] METADATA_KEY = "__meta__".getBytes(UTF_8);
+
+  /** DB key where type aliases are stored. */
+  private static final byte[] TYPE_ALIASES_KEY = "__types__".getBytes(UTF_8);
+
+  private static final BlockBasedTableConfig tableFormatConfig = new 
BlockBasedTableConfig()
+    .setFormatVersion(5);
+
+  private static final Options dbOptions = new Options()
+    .setCreateIfMissing(true)
+    .setTableFormatConfig(tableFormatConfig)
+    .setStatistics(new Statistics());
+
+  private static final WriteOptions writeOptions = new 
WriteOptions().setSync(true);
+
+  private final AtomicReference<org.rocksdb.RocksDB> _db;
+
+  final KVStoreSerializer serializer;
+
+  /**
+   * Keep a mapping of class names to a shorter, unique ID managed by the 
store. This serves two
+   * purposes: make the keys stored on disk shorter, and spread out the keys, 
since class names
+   * will often have a long, redundant prefix (think "org.apache.spark.").
+   */
+  private final ConcurrentMap<String, byte[]> typeAliases;
+  private final ConcurrentMap<Class<?>, RocksDBTypeInfo> types;
+
+  /**
+   * Trying to close a JNI RocksDB handle with a closed DB causes JVM crashes. 
This is used to
+   * ensure that all iterators are correctly closed before RocksDB is closed. 
Use weak references
+   * to ensure that the iterator can be GCed, when it is only referenced here.
+   */
+  private final ConcurrentLinkedQueue<Reference<RocksDBIterator<?>>> 
iteratorTracker;
+
+  public RocksDB(File path) throws Exception {
+    this(path, new KVStoreSerializer());
+  }
+
+  public RocksDB(File path, KVStoreSerializer serializer) throws Exception {
+    this.serializer = serializer;
+    this.types = new ConcurrentHashMap<>();
+    this._db = new AtomicReference<>(org.rocksdb.RocksDB.open(dbOptions, 
path.toString()));
+
+    byte[] versionData = db().get(STORE_VERSION_KEY);
+    if (versionData != null) {
+      long version = serializer.deserializeLong(versionData);
+      if (version != STORE_VERSION) {
+        close();
+        throw new UnsupportedStoreVersionException();
+      }
+    } else {
+      db().put(STORE_VERSION_KEY, serializer.serialize(STORE_VERSION));
+    }
+
+    Map<String, byte[]> aliases;
+    try {
+      aliases = get(TYPE_ALIASES_KEY, TypeAliases.class).aliases;
+    } catch (NoSuchElementException e) {
+      aliases = new HashMap<>();
+    }
+    typeAliases = new ConcurrentHashMap<>(aliases);
+
+    iteratorTracker = new ConcurrentLinkedQueue<>();
+  }
+
+  @Override
+  public <T> T getMetadata(Class<T> klass) throws Exception {
+    try {
+      return get(METADATA_KEY, klass);
+    } catch (NoSuchElementException nsee) {
+      return null;
+    }
+  }
+
+  @Override
+  public void setMetadata(Object value) throws Exception {
+    if (value != null) {
+      put(METADATA_KEY, value);
+    } else {
+      db().delete(METADATA_KEY);
+    }
+  }
+
+  <T> T get(byte[] key, Class<T> klass) throws Exception {
+    byte[] data = db().get(key);
+    if (data == null) {
+      throw new NoSuchElementException(new String(key, UTF_8));
+    }
+    return serializer.deserialize(data, klass);
+  }
+
+  private void put(byte[] key, Object value) throws Exception {
+    Preconditions.checkArgument(value != null, "Null values are not allowed.");
+    db().put(key, serializer.serialize(value));
+  }
+
+  @Override
+  public <T> T read(Class<T> klass, Object naturalKey) throws Exception {
+    Preconditions.checkArgument(naturalKey != null, "Null keys are not 
allowed.");
+    byte[] key = getTypeInfo(klass).naturalIndex().start(null, naturalKey);
+    return get(key, klass);
+  }
+
+  @Override
+  public void write(Object value) throws Exception {
+    Preconditions.checkArgument(value != null, "Null values are not allowed.");
+    RocksDBTypeInfo ti = getTypeInfo(value.getClass());
+    byte[] data = serializer.serialize(value);
+    synchronized (ti) {
+      try (WriteBatch writeBatch = new WriteBatch()) {
+        updateBatch(writeBatch, value, data, value.getClass(), 
ti.naturalIndex(), ti.indices());
+        db().write(writeOptions, writeBatch);
+      }
+    }
+  }
+
+  public void writeAll(List<?> values) throws Exception {
+    Preconditions.checkArgument(values != null && !values.isEmpty(),
+      "Non-empty values required.");
+
+    // Group by class, in case there are values from different classes in the 
values
+    // Typical usecase is for this to be a single class.
+    // A NullPointerException will be thrown if values contain null object.
+    for (Map.Entry<? extends Class<?>, ? extends List<?>> entry :
+        
values.stream().collect(Collectors.groupingBy(Object::getClass)).entrySet()) {
+
+      final Iterator<?> valueIter = entry.getValue().iterator();
+      final Iterator<byte[]> serializedValueIter;
+
+      // Deserialize outside synchronized block
+      List<byte[]> list = new ArrayList<>(entry.getValue().size());
+      for (Object value : values) {
+        list.add(serializer.serialize(value));
+      }
+      serializedValueIter = list.iterator();
+
+      final Class<?> klass = entry.getKey();
+      final RocksDBTypeInfo ti = getTypeInfo(klass);
+
+      synchronized (ti) {
+        final RocksDBTypeInfo.Index naturalIndex = ti.naturalIndex();
+        final Collection<RocksDBTypeInfo.Index> indices = ti.indices();
+
+        try (WriteBatch writeBatch = new WriteBatch()) {
+          while (valueIter.hasNext()) {
+            updateBatch(writeBatch, valueIter.next(), 
serializedValueIter.next(), klass,
+                    naturalIndex, indices);

Review comment:
       indent




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to