[spark] branch master updated: [SPARK-26603][K8S] Update minikube backend
This is an automated email from the ASF dual-hosted git repository. felixcheung pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 196ca0c [SPARK-26603][K8S] Update minikube backend 196ca0c is described below commit 196ca0c8f5f5caa2ae588ff4a63ce314aa42aecc Author: Stavros Kontopoulos AuthorDate: Sun Feb 3 17:15:20 2019 -0800 [SPARK-26603][K8S] Update minikube backend ## What changes were proposed in this pull request? - Covers latest minikube versions. - keeps the older version support Note: While I was facing disk pressure issues locally on machine, I noticed minikube status command would report that everything was working fine even if some kube-system pods were not up. I don't think the output is 100% reliable but it is good enough for most cases. ## How was this patch tested? Run it against latest version of minikube (v0.32.0). Author: Stavros Kontopoulos Closes #23520 from skonto/update-mini-backend. --- .../backend/minikube/Minikube.scala| 64 ++ 1 file changed, 54 insertions(+), 10 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala index 6494cbc..58aa177 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala @@ -16,7 +16,6 @@ */ package org.apache.spark.deploy.k8s.integrationtest.backend.minikube -import java.io.File import java.nio.file.Paths import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} @@ -26,8 +25,14 @@ import org.apache.spark.internal.Logging // TODO support windows private[spark] object Minikube extends Logging { - private val MINIKUBE_STARTUP_TIMEOUT_SECONDS = 60 + private val HOST_PREFIX = "host:" + private val KUBELET_PREFIX = "kubelet:" + private val APISERVER_PREFIX = "apiserver:" + private val KUBECTL_PREFIX = "kubectl:" + private val MINIKUBE_VM_PREFIX = "minikubeVM: " + private val MINIKUBE_PREFIX = "minikube: " + private val MINIKUBE_PATH = ".minikube" def getMinikubeIp: String = { val outputs = executeMinikube("ip") @@ -38,12 +43,21 @@ private[spark] object Minikube extends Logging { def getMinikubeStatus: MinikubeStatus.Value = { val statusString = executeMinikube("status") - .filter(line => line.contains("minikubeVM: ") || line.contains("minikube:")) - .head - .replaceFirst("minikubeVM: ", "") - .replaceFirst("minikube: ", "") -MinikubeStatus.unapply(statusString) +logInfo(s"Minikube status command output:\n$statusString") +// up to minikube version v0.30.0 use this to check for minikube status +val oldMinikube = statusString + .filter(line => line.contains(MINIKUBE_VM_PREFIX) || line.contains(MINIKUBE_PREFIX)) + +if (oldMinikube.isEmpty) { + getIfNewMinikubeStatus(statusString) +} else { + val finalStatusString = oldMinikube +.head +.replaceFirst(MINIKUBE_VM_PREFIX, "") +.replaceFirst(MINIKUBE_PREFIX, "") + MinikubeStatus.unapply(finalStatusString) .getOrElse(throw new IllegalStateException(s"Unknown status $statusString")) +} } def getKubernetesClient: DefaultKubernetesClient = { @@ -52,13 +66,43 @@ private[spark] object Minikube extends Logging { val kubernetesConf = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(kubernetesMaster) - .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath) - .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath) - .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath) + .withCaCertFile(Paths.get(userHome, MINIKUBE_PATH, "ca.crt").toFile.getAbsolutePath) + .withClientCertFile(Paths.get(userHome, MINIKUBE_PATH, "apiserver.crt").toFile.getAbsolutePath) + .withClientKeyFile(Paths.get(userHome, MINIKUBE_PATH, "apiserver.key").toFile.getAbsolutePath) .build() new DefaultKubernetesClient(kubernetesConf) } + // Covers minikube status output after Minikube V0.30. + private def getIfNewMinikubeStatus(statusString: Seq[String]): MinikubeStatus.Value = { +val hostString = statusString.find(_.contains(s"$HOST_PREFIX ")) +val kubeletString = statusString.find(_.contains(s"$KUBELET_PREFIX ")) +val apiserverString = statusString.find(_.contains(s"$APISERVER_PR
[spark] branch branch-2.3 updated: [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.3 by this push: new 4d6ea2c [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException 4d6ea2c is described below commit 4d6ea2cbb83d365434d0650a9e5ecaefc939b97d Author: zhoukang AuthorDate: Sun Feb 3 08:45:57 2019 -0600 [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException ## What changes were proposed in this pull request? When we run in background and we get exception which is not HiveSQLException, we may encounter memory leak since handleToOperation will not removed correctly. The reason is below: 1. When calling operation.run() in HiveSessionImpl#executeStatementInternal we throw an exception which is not HiveSQLException 2. Then the opHandle generated by SparkSQLOperationManager will not be added into opHandleSet of HiveSessionImpl , and operationManager.closeOperation(opHandle) will not be called 3. When we close the session we will also call operationManager.closeOperation(opHandle),since we did not add this opHandle into the opHandleSet. For the reasons above,the opHandled will always in SparkSQLOperationManager#handleToOperation,which will cause memory leak. More details and a case has attached on https://issues.apache.org/jira/browse/SPARK-26751 This patch will always throw HiveSQLException when running in background ## How was this patch tested? Exist UT Closes #23673 from caneGuy/zhoukang/fix-hivesessionimpl-leak. Authored-by: zhoukang Signed-off-by: Sean Owen (cherry picked from commit 255faaf3436e1f41838062ed460f801bb0be40ec) Signed-off-by: Sean Owen --- .../spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index 3cfc81b..fd17f50 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -204,7 +204,7 @@ private[hive] class SparkExecuteStatementOperation( case NonFatal(e) => logError(s"Error executing query in background", e) setState(OperationState.ERROR) - throw e + throw new HiveSQLException(e) } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.4 updated: [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.4 by this push: new 3d4aa5b [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException 3d4aa5b is described below commit 3d4aa5bd798a793a10c5d769a31f97242e78ce15 Author: zhoukang AuthorDate: Sun Feb 3 08:45:57 2019 -0600 [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException ## What changes were proposed in this pull request? When we run in background and we get exception which is not HiveSQLException, we may encounter memory leak since handleToOperation will not removed correctly. The reason is below: 1. When calling operation.run() in HiveSessionImpl#executeStatementInternal we throw an exception which is not HiveSQLException 2. Then the opHandle generated by SparkSQLOperationManager will not be added into opHandleSet of HiveSessionImpl , and operationManager.closeOperation(opHandle) will not be called 3. When we close the session we will also call operationManager.closeOperation(opHandle),since we did not add this opHandle into the opHandleSet. For the reasons above,the opHandled will always in SparkSQLOperationManager#handleToOperation,which will cause memory leak. More details and a case has attached on https://issues.apache.org/jira/browse/SPARK-26751 This patch will always throw HiveSQLException when running in background ## How was this patch tested? Exist UT Closes #23673 from caneGuy/zhoukang/fix-hivesessionimpl-leak. Authored-by: zhoukang Signed-off-by: Sean Owen (cherry picked from commit 255faaf3436e1f41838062ed460f801bb0be40ec) Signed-off-by: Sean Owen --- .../spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index 3cfc81b..fd17f50 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -204,7 +204,7 @@ private[hive] class SparkExecuteStatementOperation( case NonFatal(e) => logError(s"Error executing query in background", e) setState(OperationState.ERROR) - throw e + throw new HiveSQLException(e) } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 255faaf [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException 255faaf is described below commit 255faaf3436e1f41838062ed460f801bb0be40ec Author: zhoukang AuthorDate: Sun Feb 3 08:45:57 2019 -0600 [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException ## What changes were proposed in this pull request? When we run in background and we get exception which is not HiveSQLException, we may encounter memory leak since handleToOperation will not removed correctly. The reason is below: 1. When calling operation.run() in HiveSessionImpl#executeStatementInternal we throw an exception which is not HiveSQLException 2. Then the opHandle generated by SparkSQLOperationManager will not be added into opHandleSet of HiveSessionImpl , and operationManager.closeOperation(opHandle) will not be called 3. When we close the session we will also call operationManager.closeOperation(opHandle),since we did not add this opHandle into the opHandleSet. For the reasons above,the opHandled will always in SparkSQLOperationManager#handleToOperation,which will cause memory leak. More details and a case has attached on https://issues.apache.org/jira/browse/SPARK-26751 This patch will always throw HiveSQLException when running in background ## How was this patch tested? Exist UT Closes #23673 from caneGuy/zhoukang/fix-hivesessionimpl-leak. Authored-by: zhoukang Signed-off-by: Sean Owen --- .../spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index 83dfa74..1772fe6 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -205,7 +205,7 @@ private[hive] class SparkExecuteStatementOperation( case NonFatal(e) => logError(s"Error executing query in background", e) setState(OperationState.ERROR) - throw e + throw new HiveSQLException(e) } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26818][ML] Make MLEvents JSON ser/de safe
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new dfb8809 [SPARK-26818][ML] Make MLEvents JSON ser/de safe dfb8809 is described below commit dfb880951a8de55c587c1bf8b696df50eae6e68a Author: Hyukjin Kwon AuthorDate: Sun Feb 3 21:19:35 2019 +0800 [SPARK-26818][ML] Make MLEvents JSON ser/de safe ## What changes were proposed in this pull request? Currently, it looks it's not going to cause any virtually effective problem apparently (if I didn't misread the codes). I see one place that JSON formatted events are being used. https://github.com/apache/spark/blob/ec506bd30c2ca324c12c9ec811764081c2eb8c42/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala#L148 It's okay because it just logs when the exception is ignorable https://github.com/apache/spark/blob/9690eba16efe6d25261934d8b73a221972b684f3/core/src/main/scala/org/apache/spark/util/ListenerBus.scala#L111 I guess it should be best to stay safe - I don't want this unstable experimental feature breaks anything in any case. It also disables `logEvent` in `SparkListenerEvent` for the same reason. This is also to match SQL execution events side: https://github.com/apache/spark/blob/ca545f79410a464ef24e3986fac225f53bb2ef02/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala#L41-L57 to make ML events JSON ser/de safe. ## How was this patch tested? Manually tested, and unit tests were added. Closes #23728 from HyukjinKwon/SPARK-26818. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon --- .../main/scala/org/apache/spark/ml/events.scala| 81 +++ .../scala/org/apache/spark/ml/MLEventsSuite.scala | 112 + 2 files changed, 155 insertions(+), 38 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/events.scala b/mllib/src/main/scala/org/apache/spark/ml/events.scala index c51600f..dc4be4d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/events.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/events.scala @@ -17,6 +17,8 @@ package org.apache.spark.ml +import com.fasterxml.jackson.annotation.JsonIgnore + import org.apache.spark.SparkContext import org.apache.spark.annotation.Unstable import org.apache.spark.internal.Logging @@ -29,53 +31,84 @@ import org.apache.spark.sql.{DataFrame, Dataset} * after each operation (the event should document this). * * @note This is supported via [[Pipeline]] and [[PipelineModel]]. + * @note This is experimental and unstable. Do not use this unless you fully + * understand what `Unstable` means. */ @Unstable -sealed trait MLEvent extends SparkListenerEvent +sealed trait MLEvent extends SparkListenerEvent { + // Do not log ML events in event log. It should be revisited to see + // how it works with history server. + protected[spark] override def logEvent: Boolean = false +} /** * Event fired before `Transformer.transform`. */ @Unstable -case class TransformStart(transformer: Transformer, input: Dataset[_]) extends MLEvent +case class TransformStart() extends MLEvent { + @JsonIgnore var transformer: Transformer = _ + @JsonIgnore var input: Dataset[_] = _ +} + /** * Event fired after `Transformer.transform`. */ @Unstable -case class TransformEnd(transformer: Transformer, output: Dataset[_]) extends MLEvent +case class TransformEnd() extends MLEvent { + @JsonIgnore var transformer: Transformer = _ + @JsonIgnore var output: Dataset[_] = _ +} /** * Event fired before `Estimator.fit`. */ @Unstable -case class FitStart[M <: Model[M]](estimator: Estimator[M], dataset: Dataset[_]) extends MLEvent +case class FitStart[M <: Model[M]]() extends MLEvent { + @JsonIgnore var estimator: Estimator[M] = _ + @JsonIgnore var dataset: Dataset[_] = _ +} + /** * Event fired after `Estimator.fit`. */ @Unstable -case class FitEnd[M <: Model[M]](estimator: Estimator[M], model: M) extends MLEvent +case class FitEnd[M <: Model[M]]() extends MLEvent { + @JsonIgnore var estimator: Estimator[M] = _ + @JsonIgnore var model: M = _ +} /** * Event fired before `MLReader.load`. */ @Unstable -case class LoadInstanceStart[T](reader: MLReader[T], path: String) extends MLEvent +case class LoadInstanceStart[T](path: String) extends MLEvent { + @JsonIgnore var reader: MLReader[T] = _ +} + /** * Event fired after `MLReader.load`. */ @Unstable -case class LoadInstanceEnd[T](reader: MLReader[T], instance: T) extends MLEvent +case class LoadInstanceEnd[T]() extends MLEvent { + @JsonIgnore var reader: MLReader[T] = _ + @JsonIgnore var instance: T = _ +} /** * Event fired before `MLWriter.save`. */ @Unstable -case class SaveInstanceStart(writer: MLWr