This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new faab553cac70 [SPARK-48060][SS][TESTS] Fix `StreamingQueryHashPartitionVerifySuite` to update golden files correctly faab553cac70 is described below commit faab553cac70eefeec286b1823b70ad62bed87f8 Author: Dongjoon Hyun <dh...@apple.com> AuthorDate: Tue Apr 30 12:50:07 2024 -0700 [SPARK-48060][SS][TESTS] Fix `StreamingQueryHashPartitionVerifySuite` to update golden files correctly ### What changes were proposed in this pull request? This PR aims to fix `StreamingQueryHashPartitionVerifySuite` to update golden files correctly. - The documentation is added. - Newly generated files are updated. ### Why are the changes needed? Previously, `SPARK_GENERATE_GOLDEN_FILES` doesn't work as expected because it updates the files under `target` directory. We need to update `src/test` files. **BEFORE** ``` $ SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *StreamingQueryHashPartitionVerifySuite" $ git status On branch master Your branch is up to date with 'apache/master'. nothing to commit, working tree clean ``` **AFTER** ``` $ SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *StreamingQueryHashPartitionVerifySuite" \ -Dspark.sql.test.randomDataGenerator.maxStrLen=100 \ -Dspark.sql.test.randomDataGenerator.maxArraySize=4 $ git status On branch SPARK-48060 Your branch is up to date with 'dongjoon/SPARK-48060'. Changes not staged for commit: (use "git add <file>..." to update what will be committed) (use "git restore <file>..." to discard changes in working directory) modified: sql/core/src/test/resources/structured-streaming/partition-tests/randomSchemas modified: sql/core/src/test/resources/structured-streaming/partition-tests/rowsAndPartIds no changes added to commit (use "git add" and/or "git commit -a") ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. I regenerate the data like the following. ``` $ SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *StreamingQueryHashPartitionVerifySuite" \ -Dspark.sql.test.randomDataGenerator.maxStrLen=100 \ -Dspark.sql.test.randomDataGenerator.maxArraySize=4 ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46304 from dongjoon-hyun/SPARK-48060. Authored-by: Dongjoon Hyun <dh...@apple.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../partition-tests/randomSchemas | 2 +- .../partition-tests/rowsAndPartIds | Bin 4862115 -> 13341426 bytes .../StreamingQueryHashPartitionVerifySuite.scala | 22 +++++++++++++++------ 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/resources/structured-streaming/partition-tests/randomSchemas b/sql/core/src/test/resources/structured-streaming/partition-tests/randomSchemas index 8d6ff942610c..f6eadd776cc6 100644 --- a/sql/core/src/test/resources/structured-streaming/partition-tests/randomSchemas +++ b/sql/core/src/test/resources/structured-streaming/partition-tests/randomSchemas @@ -1 +1 @@ -col_0 STRUCT<col_0: BINARY, col_1: BIGINT NOT NULL, col_2: ARRAY<DOUBLE> NOT NULL, col_3: FLOAT NOT NULL, col_4: INT NOT NULL>,col_1 STRUCT<col_0: STRING, col_1: TIMESTAMP NOT NULL, col_2: STRUCT<col_0: FLOAT NOT NULL>, col_3: ARRAY<INT> NOT NULL, col_4: ARRAY<BINARY>, col_5: TIMESTAMP NOT NULL, col_6: STRUCT<col_0: ARRAY<DOUBLE>, col_1: BIGINT NOT NULL> NOT NULL, col_7: ARRAY<INT> NOT NULL, col_8: ARRAY<BIGINT>, col_9: BIGINT NOT NULL> NOT NULL,col_2 BIGINT NOT NULL,col_3 STRUCT<col_0: [...] +col_0 ARRAY<BINARY>,col_1 STRUCT<col_0: STRING> NOT NULL,col_2 STRING NOT NULL,col_3 STRUCT<col_0: INT, col_1: ARRAY<STRING>, col_2: ARRAY<DOUBLE> NOT NULL> NOT NULL,col_4 BINARY NOT NULL,col_5 ARRAY<BINARY> NOT NULL,col_6 ARRAY<FLOAT>,col_7 DOUBLE NOT NULL,col_8 ARRAY<DOUBLE> NOT NULL,col_9 ARRAY<TIMESTAMP>,col_10 FLOAT NOT NULL,col_11 STRUCT<col_0: STRUCT<col_0: ARRAY<TIMESTAMP> NOT NULL>, col_1: STRUCT<col_0: ARRAY<STRING> NOT NULL, col_1: INT, col_2: STRUCT<col_0: STRUCT<col_0: STRIN [...] diff --git a/sql/core/src/test/resources/structured-streaming/partition-tests/rowsAndPartIds b/sql/core/src/test/resources/structured-streaming/partition-tests/rowsAndPartIds index 3902d6d7d5f6..1b2eda8502e5 100644 Binary files a/sql/core/src/test/resources/structured-streaming/partition-tests/rowsAndPartIds and b/sql/core/src/test/resources/structured-streaming/partition-tests/rowsAndPartIds differ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryHashPartitionVerifySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryHashPartitionVerifySuite.scala index 3423b8b8cb28..3d8c20af3b38 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryHashPartitionVerifySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryHashPartitionVerifySuite.scala @@ -31,11 +31,24 @@ import org.apache.spark.sql.catalyst.expressions.{BoundReference, GenericInterna import org.apache.spark.sql.catalyst.plans.physical._ import org.apache.spark.sql.types.{BinaryType, DataType, DoubleType, FloatType, IntegerType, LongType, StringType, StructType, TimestampType} +/** + * To run the test suite: + * {{{ + * build/sbt "sql/testOnly *StreamingQueryHashPartitionVerifySuite" + * }}} + * + * To re-generate the golden file with size limit under 10Mb, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *StreamingQueryHashPartitionVerifySuite" + * -Dspark.sql.test.randomDataGenerator.maxStrLen=100 + * -Dspark.sql.test.randomDataGenerator.maxArraySize=4 + * }}} + */ class StreamingQueryHashPartitionVerifySuite extends StreamTest { - // Configs for golden file - private val goldenFileURI = - this.getClass.getResource("/structured-streaming/partition-tests/").toURI + // A golden file directory in `src/test` instead of `target` directory. + private val goldenFileURI = getWorkspaceFilePath( + "sql", "core", "src", "test", "resources", "structured-streaming", "partition-tests").toUri private val schemaFileName = "randomSchemas" // files for storing random input schemas private val rowAndPartIdFilename = @@ -152,9 +165,6 @@ class StreamingQueryHashPartitionVerifySuite extends StreamTest { val rowAndPartIdFile = new File(goldenFileURI.getPath, rowAndPartIdFilename) if (regenerateGoldenFiles) { - // To limit the golden file size under 10Mb, please set the final val MAX_STR_LEN: Int = 100 - // and final val MAX_ARR_SIZE: Int = 4 in org.apache.spark.sql.RandomDataGenerator - val random = new Random() val schemas = getRandomSchemas(random) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org