This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6fba9e2 [SPARK-37951][MLLIB][K8S] Move test file from ../data/ to corresponding module's resource folder 6fba9e2 is described below commit 6fba9e27f6b7743fd582a44f87c6d77b165af58f Author: Angerszhuuuu <angers....@gmail.com> AuthorDate: Wed Jan 19 17:01:13 2022 +0800 [SPARK-37951][MLLIB][K8S] Move test file from ../data/ to corresponding module's resource folder ### What changes were proposed in this pull request? Move test file form `data/` dir to corresponding module's resource folder 1. move `../data/mllib/images/partitioned` to mllib's `resources/inages/partitioned` 2. move `../data/mllib/iris_libsvm.txt` to mllib's `resources/iris_libsvm.txt` 3. copy `data/mllib/pagerank_data.txt` to kubenets-integration-test's `resources/pagerank_data.txt` ### Why are the changes needed? Refactor code to avoid test failure ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existed UT Closes #35237 from AngersZhuuuu/SPARK-37951. Authored-by: Angerszhuuuu <angers....@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/ml/source/image/ImageDataSource.scala | 4 ++-- .../cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg | Bin .../partitioned/cls=kittens/date=2018-01/not-image.txt | 0 .../images/partitioned/cls=kittens/date=2018-02/54893.jpg | Bin .../partitioned/cls=kittens/date=2018-02/DP153539.jpg | Bin .../partitioned/cls=kittens/date=2018-02/DP802813.jpg | Bin .../partitioned/cls=multichannel/date=2018-01/BGRA.png | Bin .../cls=multichannel/date=2018-01/BGRA_alpha_60.png | Bin .../cls=multichannel/date=2018-02/chr30.4.184.jpg | Bin .../partitioned/cls=multichannel/date=2018-02/grayscale.jpg | Bin {data/mllib => mllib/src/test/resources}/iris_libsvm.txt | 0 .../spark/ml/evaluation/ClusteringEvaluatorSuite.scala | 2 +- .../apache/spark/ml/source/image/ImageFileFormatSuite.scala | 3 +-- .../integration-tests/src/test/resources/pagerank_data.txt | 6 ++++++ .../spark/deploy/k8s/integrationtest/BasicTestsSuite.scala | 6 +++--- 15 files changed, 13 insertions(+), 8 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala b/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala index d4d7408..9413d99 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageDataSource.scala @@ -37,12 +37,12 @@ package org.apache.spark.ml.source.image * // Scala * val df = spark.read.format("image") * .option("dropInvalid", true) - * .load("data/mllib/images/partitioned") + * .load("/path/to/images") * * // Java * Dataset<Row> df = spark.read().format("image") * .option("dropInvalid", true) - * .load("data/mllib/images/partitioned"); + * .load("/path/to/images"); * }}} * * Image data source supports the following options: diff --git a/data/mllib/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg b/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg similarity index 100% rename from data/mllib/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg rename to mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg diff --git a/data/mllib/images/partitioned/cls=kittens/date=2018-01/not-image.txt b/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/not-image.txt similarity index 100% rename from data/mllib/images/partitioned/cls=kittens/date=2018-01/not-image.txt rename to mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-01/not-image.txt diff --git a/data/mllib/images/partitioned/cls=kittens/date=2018-02/54893.jpg b/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/54893.jpg similarity index 100% rename from data/mllib/images/partitioned/cls=kittens/date=2018-02/54893.jpg rename to mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/54893.jpg diff --git a/data/mllib/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg b/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg similarity index 100% rename from data/mllib/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg rename to mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg diff --git a/data/mllib/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg b/mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg similarity index 100% rename from data/mllib/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg rename to mllib/src/test/resources/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg diff --git a/data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA.png b/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA.png similarity index 100% rename from data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA.png rename to mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA.png diff --git a/data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png b/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png similarity index 100% rename from data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png rename to mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png diff --git a/data/mllib/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg b/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg similarity index 100% rename from data/mllib/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg rename to mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg diff --git a/data/mllib/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg b/mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg similarity index 100% rename from data/mllib/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg rename to mllib/src/test/resources/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg diff --git a/data/mllib/iris_libsvm.txt b/mllib/src/test/resources/iris_libsvm.txt similarity index 100% rename from data/mllib/iris_libsvm.txt rename to mllib/src/test/resources/iris_libsvm.txt diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala index 06f2cb2..baeebfb 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/ClusteringEvaluatorSuite.scala @@ -40,7 +40,7 @@ class ClusteringEvaluatorSuite override def beforeAll(): Unit = { super.beforeAll() - irisDataset = spark.read.format("libsvm").load("../data/mllib/iris_libsvm.txt") + irisDataset = spark.read.format("libsvm").load(getTestResourcePath("iris_libsvm.txt")) val datasets = MLTestingUtils.generateArrayFeatureDataset(irisDataset) newIrisDataset = datasets._1 newIrisDatasetD = datasets._2 diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala index 0ec2747..10b9bbb 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala @@ -29,8 +29,7 @@ import org.apache.spark.sql.functions.{col, substring_index} class ImageFileFormatSuite extends SparkFunSuite with MLlibTestSparkContext { // Single column of images named "image" - private lazy val imagePath = "../data/mllib/images/partitioned" - private lazy val recursiveImagePath = "../data/mllib/images" + private lazy val imagePath = getTestResourcePath("images/partitioned") test("Smoke test: create basic ImageSchema dataframe") { val origin = "path" diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/pagerank_data.txt b/resource-managers/kubernetes/integration-tests/src/test/resources/pagerank_data.txt new file mode 100644 index 0000000..95755ab --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/resources/pagerank_data.txt @@ -0,0 +1,6 @@ +1 2 +1 3 +1 4 +2 1 +3 1 +4 1 diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala index 6db4bee..d704ef7 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala @@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.Pod import org.scalatest.concurrent.Eventually import org.scalatest.matchers.should.Matchers._ -import org.apache.spark.TestUtils +import org.apache.spark.{SparkFunSuite, TestUtils} import org.apache.spark.launcher.SparkLauncher private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite => @@ -126,11 +126,11 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite => } } -private[spark] object BasicTestsSuite { +private[spark] object BasicTestsSuite extends SparkFunSuite { val SPARK_PAGE_RANK_MAIN_CLASS: String = "org.apache.spark.examples.SparkPageRank" val CONTAINER_LOCAL_FILE_DOWNLOAD_PATH = "/var/spark-data/spark-files" val CONTAINER_LOCAL_DOWNLOADED_PAGE_RANK_DATA_FILE = s"$CONTAINER_LOCAL_FILE_DOWNLOAD_PATH/pagerank_data.txt" - val REMOTE_PAGE_RANK_DATA_FILE = "data/mllib/pagerank_data.txt" + val REMOTE_PAGE_RANK_DATA_FILE = getTestResourcePath("pagerank_data.txt") val REMOTE_PAGE_RANK_FILE_NAME = "pagerank_data.txt" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org