HeartSaVioR commented on a change in pull request #28412:
URL: https://github.com/apache/spark/pull/28412#discussion_r437846179



##########
File path: core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
##########
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.IOException
+import java.util.Collection
+import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.util.kvstore._
+
+/**
+ * An implementation of KVStore that accelerates event logs loading.
+ *
+ * When rebuilding the application state from event logs, HybridStore will
+ * write data to InMemoryStore at first and use a background thread to dump
+ * data to LevelDB once the app store is restored. We don't expect write
+ * operations (except the case for caching) after calling switch to level DB.
+ */
+
+private[history] class HybridStore extends KVStore {
+
+  private val inMemoryStore = new InMemoryStore()
+
+  private var levelDB: LevelDB = null
+
+  // Flag to indicate whether we should use inMemoryStore or levelDB
+  private val shouldUseInMemoryStore = new AtomicBoolean(true)
+
+  // Flag to indicate whether this hybrid store is closed, use this flag
+  // to avoid starting background thread after the store is closed
+  private val closed = new AtomicBoolean(false)
+
+  // A background thread that dumps data from inMemoryStore to levelDB
+  private var backgroundThread: Thread = null
+
+  // A hash map that stores all classes that had been writen to inMemoryStore
+  private val klassMap = new ConcurrentHashMap[Class[_], Boolean]
+
+  override def getMetadata[T](klass: Class[T]): T = {
+    getStore().getMetadata(klass)
+  }
+
+  override def setMetadata(value: Object): Unit = {
+    getStore().setMetadata(value)
+  }
+
+  override def read[T](klass: Class[T], naturalKey: Object): T = {
+    getStore().read(klass, naturalKey)
+  }
+
+  override def write(value: Object): Unit = {
+    getStore().write(value)
+
+    if (backgroundThread == null) {
+      // New classes won't be dumped once the background thread is started
+      klassMap.putIfAbsent(value.getClass(), true)
+    }
+  }
+
+  override def delete(klass: Class[_], naturalKey: Object): Unit = {
+    if (backgroundThread != null) {
+      throw new IllegalStateException("delete() shouldn't be called after " +
+        "the hybrid store begins switching to levelDB")
+    }
+
+    getStore().delete(klass, naturalKey)
+  }
+
+  override def view[T](klass: Class[T]): KVStoreView[T] = {
+    getStore().view(klass)
+  }
+
+  override def count(klass: Class[_]): Long = {
+    getStore().count(klass)
+  }
+
+  override def count(klass: Class[_], index: String, indexedValue: Object): 
Long = {
+    getStore().count(klass, index, indexedValue)
+  }
+
+  override def close(): Unit = {
+    closed.set(true)
+
+    if (backgroundThread != null && backgroundThread.isAlive()) {
+      // The background thread is still running, wait for it to finish
+      backgroundThread.join()
+    }
+
+    try {
+      if (levelDB != null) {
+        levelDB.close()
+      }
+    } catch {
+      case ioe: IOException => throw ioe
+    } finally {
+      inMemoryStore.close()
+    }
+  }
+
+  override def removeAllByIndexValues[T](
+      klass: Class[T],
+      index: String,
+      indexValues: Collection[_]): Boolean = {
+    if (backgroundThread != null) {
+      throw new IllegalStateException("removeAllByIndexValues() shouldn't be " 
+
+        "called after the hybrid store begins switching to levelDB")
+    }
+
+    getStore().removeAllByIndexValues(klass, index, indexValues)
+  }
+
+  def setLevelDB(levelDB: LevelDB): Unit = {
+    this.levelDB = levelDB
+  }
+
+  /**
+   * This method is called when the writing is done for inMemoryStore. A
+   * background thread will be created and be started to dump data in 
inMemoryStore
+   * to levelDB. Once the dumping is completed, the underlying kvstore will be
+   * switched to levelDB.
+   */
+  def switchToLevelDB(listener: HybridStore.SwitchToLevelDBListener): Unit = {
+    if (closed.get) {
+      return
+    }
+
+    backgroundThread = new Thread(() => {
+      try {
+        for (klass <- klassMap.keys().asScala) {
+          val it = inMemoryStore.view(klass).closeableIterator()
+          while (it.hasNext()) {
+            levelDB.write(it.next())
+          }
+        }
+        listener.onSwitchToLevelDBSuccess()
+        shouldUseInMemoryStore.set(false)
+        inMemoryStore.close()
+      } catch {
+        case e: Exception =>
+          listener.onSwitchToLevelDBFail(e)
+      }
+    })
+    backgroundThread.setDaemon(true)
+    backgroundThread.setName("hybridstore-switch-to-leveldb")

Review comment:
       Maybe better to track if we add the unique ID like application ID + 
attempt ID in thread name. To reduce length I guess `hybridstore-` from the 
current string is enough as there's only one kind of thread here.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to