This is an automated email from the ASF dual-hosted git repository.
mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 3ad22eb86 chore: Refactor Parquet/DataFrame fuzz data generators
(#2629)
3ad22eb86 is described below
commit 3ad22eb865bbc990b0d9d103833c33f13a7af829
Author: Andy Grove <[email protected]>
AuthorDate: Wed Oct 22 12:59:04 2025 -0600
chore: Refactor Parquet/DataFrame fuzz data generators (#2629)
---
docs/source/user-guide/latest/configs.md | 1 +
.../main/scala/org/apache/comet/fuzz/Main.scala | 16 ++--
.../apache/comet/testing/FuzzDataGenerator.scala | 91 ++++++++----------
.../apache/comet/testing/ParquetGenerator.scala | 20 +++-
.../apache/comet/CometArrayExpressionSuite.scala | 106 ++++++++-------------
.../apache/comet/CometBitwiseExpressionSuite.scala | 18 +---
.../scala/org/apache/comet/CometFuzzTestBase.scala | 25 +++--
.../org/apache/comet/CometFuzzTestSuite.scala | 19 ++--
.../org/apache/comet/CometMapExpressionSuite.scala | 19 ++--
.../apache/comet/exec/CometAggregateSuite.scala | 10 +-
.../org/apache/comet/exec/CometExecSuite.scala | 19 ++--
11 files changed, 171 insertions(+), 173 deletions(-)
diff --git a/docs/source/user-guide/latest/configs.md
b/docs/source/user-guide/latest/configs.md
index c4c334372..bc8fdcb98 100644
--- a/docs/source/user-guide/latest/configs.md
+++ b/docs/source/user-guide/latest/configs.md
@@ -268,6 +268,7 @@ These settings can be used to determine which parts of the
plan are accelerated
| `spark.comet.expression.Reverse.enabled` | Enable Comet acceleration for
`Reverse` | true |
| `spark.comet.expression.Round.enabled` | Enable Comet acceleration for
`Round` | true |
| `spark.comet.expression.Second.enabled` | Enable Comet acceleration for
`Second` | true |
+| `spark.comet.expression.Sha1.enabled` | Enable Comet acceleration for `Sha1`
| true |
| `spark.comet.expression.Sha2.enabled` | Enable Comet acceleration for `Sha2`
| true |
| `spark.comet.expression.ShiftLeft.enabled` | Enable Comet acceleration for
`ShiftLeft` | true |
| `spark.comet.expression.ShiftRight.enabled` | Enable Comet acceleration for
`ShiftRight` | true |
diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Main.scala
b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Main.scala
index 9b9a4b6f3..1f81dc779 100644
--- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Main.scala
+++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/Main.scala
@@ -26,7 +26,7 @@ import org.rogach.scallop.ScallopOption
import org.apache.spark.sql.SparkSession
-import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
+import org.apache.comet.testing.{DataGenOptions, ParquetGenerator,
SchemaGenOptions}
class Conf(arguments: Seq[String]) extends ScallopConf(arguments) {
object generateData extends Subcommand("data") {
@@ -78,19 +78,19 @@ object Main {
case Some(seed) => new Random(seed)
case None => new Random()
}
- val options = DataGenOptions(
- allowNull = true,
- generateArray = conf.generateData.generateArrays(),
- generateStruct = conf.generateData.generateStructs(),
- generateMap = conf.generateData.generateMaps(),
- generateNegativeZero = !conf.generateData.excludeNegativeZero())
for (i <- 0 until conf.generateData.numFiles()) {
ParquetGenerator.makeParquetFile(
r,
spark,
s"test$i.parquet",
numRows = conf.generateData.numRows(),
- options)
+ SchemaGenOptions(
+ generateArray = conf.generateData.generateArrays(),
+ generateStruct = conf.generateData.generateStructs(),
+ generateMap = conf.generateData.generateMaps()),
+ DataGenOptions(
+ allowNull = true,
+ generateNegativeZero = !conf.generateData.excludeNegativeZero()))
}
case Some(conf.generateQueries) =>
val r = conf.generateQueries.randomSeed.toOption match {
diff --git
a/spark/src/main/scala/org/apache/comet/testing/FuzzDataGenerator.scala
b/spark/src/main/scala/org/apache/comet/testing/FuzzDataGenerator.scala
index 7c7a6727f..087221e1a 100644
--- a/spark/src/main/scala/org/apache/comet/testing/FuzzDataGenerator.scala
+++ b/spark/src/main/scala/org/apache/comet/testing/FuzzDataGenerator.scala
@@ -44,50 +44,16 @@ object FuzzDataGenerator {
val defaultBaseDate: Long =
new SimpleDateFormat("YYYY-MM-DD hh:mm:ss").parse("3333-05-25
12:34:56").getTime
- private val primitiveTypes = Seq(
- DataTypes.BooleanType,
- DataTypes.ByteType,
- DataTypes.ShortType,
- DataTypes.IntegerType,
- DataTypes.LongType,
- DataTypes.FloatType,
- DataTypes.DoubleType,
- DataTypes.createDecimalType(10, 2),
- DataTypes.createDecimalType(36, 18),
- DataTypes.DateType,
- DataTypes.TimestampType,
- DataTypes.TimestampNTZType,
- DataTypes.StringType,
- DataTypes.BinaryType)
-
- private def filteredPrimitives(excludeTypes: Seq[DataType]) = {
-
- primitiveTypes.filterNot { dataType =>
- excludeTypes.exists {
- case _: DecimalType =>
- // For DecimalType, match if the type is also a DecimalType (ignore
precision/scale)
- dataType.isInstanceOf[DecimalType]
- case excludeType =>
- dataType == excludeType
- }
- }
- }
-
- def generateDataFrame(
- r: Random,
- spark: SparkSession,
- numRows: Int,
- options: DataGenOptions): DataFrame = {
-
- val filteredPrimitiveTypes = filteredPrimitives(options.excludeTypes)
+ def generateSchema(options: SchemaGenOptions): StructType = {
+ val primitiveTypes = options.primitiveTypes
val dataTypes = ListBuffer[DataType]()
- dataTypes.appendAll(filteredPrimitiveTypes)
+ dataTypes.appendAll(primitiveTypes)
- val arraysOfPrimitives =
filteredPrimitiveTypes.map(DataTypes.createArrayType)
+ val arraysOfPrimitives = primitiveTypes.map(DataTypes.createArrayType)
if (options.generateStruct) {
- dataTypes += StructType(filteredPrimitiveTypes.zipWithIndex.map(x =>
- StructField(s"c${x._2}", x._1, nullable = true)))
+ dataTypes += StructType(
+ primitiveTypes.zipWithIndex.map(x => StructField(s"c${x._2}", x._1,
nullable = true)))
if (options.generateArray) {
dataTypes += StructType(arraysOfPrimitives.zipWithIndex.map(x =>
@@ -103,9 +69,8 @@ object FuzzDataGenerator {
dataTypes.appendAll(arraysOfPrimitives)
if (options.generateStruct) {
- dataTypes += DataTypes.createArrayType(
- StructType(filteredPrimitiveTypes.zipWithIndex.map(x =>
- StructField(s"c${x._2}", x._1, nullable = true))))
+ dataTypes +=
DataTypes.createArrayType(StructType(primitiveTypes.zipWithIndex.map(x =>
+ StructField(s"c${x._2}", x._1, nullable = true))))
}
if (options.generateMap) {
@@ -117,7 +82,15 @@ object FuzzDataGenerator {
// generate schema using random data types
val fields = dataTypes.zipWithIndex
.map(i => StructField(s"c${i._2}", i._1, nullable = true))
- val schema = StructType(fields.toSeq)
+ StructType(fields.toSeq)
+ }
+
+ def generateDataFrame(
+ r: Random,
+ spark: SparkSession,
+ schema: StructType,
+ numRows: Int,
+ options: DataGenOptions): DataFrame = {
// generate columnar data
val cols: Seq[Seq[Any]] =
@@ -247,11 +220,31 @@ object FuzzDataGenerator {
}
}
-case class DataGenOptions(
- allowNull: Boolean = true,
- generateNegativeZero: Boolean = true,
- baseDate: Long = FuzzDataGenerator.defaultBaseDate,
+object SchemaGenOptions {
+ val defaultPrimitiveTypes: Seq[DataType] = Seq(
+ DataTypes.BooleanType,
+ DataTypes.ByteType,
+ DataTypes.ShortType,
+ DataTypes.IntegerType,
+ DataTypes.LongType,
+ DataTypes.FloatType,
+ DataTypes.DoubleType,
+ DataTypes.createDecimalType(10, 2),
+ DataTypes.createDecimalType(36, 18),
+ DataTypes.DateType,
+ DataTypes.TimestampType,
+ DataTypes.TimestampNTZType,
+ DataTypes.StringType,
+ DataTypes.BinaryType)
+}
+
+case class SchemaGenOptions(
generateArray: Boolean = false,
generateStruct: Boolean = false,
generateMap: Boolean = false,
- excludeTypes: Seq[DataType] = Seq.empty)
+ primitiveTypes: Seq[DataType] = SchemaGenOptions.defaultPrimitiveTypes)
+
+case class DataGenOptions(
+ allowNull: Boolean = true,
+ generateNegativeZero: Boolean = true,
+ baseDate: Long = FuzzDataGenerator.defaultBaseDate)
diff --git
a/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
b/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
index 27e40c9d7..a43100a69 100644
--- a/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
+++ b/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
@@ -22,18 +22,32 @@ package org.apache.comet.testing
import scala.util.Random
import org.apache.spark.sql.{SaveMode, SparkSession}
+import org.apache.spark.sql.types.StructType
object ParquetGenerator {
+ /** Generate a Parquet file using a generated schema */
def makeParquetFile(
r: Random,
spark: SparkSession,
filename: String,
numRows: Int,
- options: DataGenOptions): Unit = {
-
- val df = FuzzDataGenerator.generateDataFrame(r, spark, numRows, options)
+ schemaGenOptions: SchemaGenOptions,
+ dataGenOptions: DataGenOptions): Unit = {
+ val schema = FuzzDataGenerator.generateSchema(schemaGenOptions)
+ makeParquetFile(r, spark, filename, schema, numRows, dataGenOptions)
+ }
+ /** Generate a Parquet file using the provided schema */
+ def makeParquetFile(
+ r: Random,
+ spark: SparkSession,
+ filename: String,
+ schema: StructType,
+ numRows: Int,
+ options: DataGenOptions): Unit = {
+ val df = FuzzDataGenerator.generateDataFrame(r, spark, schema, numRows,
options)
df.write.mode(SaveMode.Overwrite).parquet(filename)
}
+
}
diff --git
a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
index 2adb7a9ed..c346dc2e9 100644
--- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.functions._
import org.apache.comet.CometSparkSessionExtensions.{isSpark35Plus,
isSpark40Plus}
import org.apache.comet.DataTypeSupport.isComplexType
import org.apache.comet.serde.{CometArrayExcept, CometArrayRemove,
CometArrayReverse, CometFlatten}
-import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
+import org.apache.comet.testing.{DataGenOptions, ParquetGenerator,
SchemaGenOptions}
class CometArrayExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
@@ -64,12 +64,8 @@ class CometArrayExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelp
spark,
filename,
100,
- DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = false,
- generateStruct = false,
- generateMap = false))
+ SchemaGenOptions(generateArray = false, generateStruct = false,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
val table = spark.read.parquet(filename)
table.createOrReplaceTempView("t1")
@@ -95,13 +91,13 @@ class CometArrayExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelp
val filename = path.toString
val random = new Random(42)
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
- val options = DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = true,
- generateStruct = true,
- generateMap = false)
- ParquetGenerator.makeParquetFile(random, spark, filename, 100,
options)
+ ParquetGenerator.makeParquetFile(
+ random,
+ spark,
+ filename,
+ 100,
+ SchemaGenOptions(generateArray = true, generateStruct = true,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
withSQLConf(
CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "false",
@@ -266,12 +262,8 @@ class CometArrayExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelp
spark,
filename,
100,
- DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = true,
- generateStruct = true,
- generateMap = false))
+ SchemaGenOptions(generateArray = true, generateStruct = true,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
val table = spark.read.parquet(filename)
table.createOrReplaceTempView("t1")
@@ -310,12 +302,8 @@ class CometArrayExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelp
spark,
filename,
100,
- DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = false,
- generateStruct = false,
- generateMap = false))
+ SchemaGenOptions(generateArray = false, generateStruct = false,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
val table = spark.read.parquet(filename)
table.createOrReplaceTempView("t2")
@@ -340,12 +328,8 @@ class CometArrayExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelp
spark,
filename,
100,
- DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = true,
- generateStruct = true,
- generateMap = false))
+ SchemaGenOptions(generateArray = true, generateStruct = true,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
withSQLConf(
CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "false",
@@ -588,12 +572,8 @@ class CometArrayExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelp
spark,
filename,
100,
- DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = false,
- generateStruct = false,
- generateMap = false))
+ SchemaGenOptions(generateArray = false, generateStruct = false,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
withSQLConf(CometConf.COMET_EXPR_ALLOW_INCOMPATIBLE.key -> "true") {
withTempView("t1", "t2") {
@@ -622,13 +602,13 @@ class CometArrayExpressionSuite extends CometTestBase
with AdaptiveSparkPlanHelp
val filename = path.toString
val random = new Random(42)
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
- val options = DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = true,
- generateStruct = true,
- generateMap = false)
- ParquetGenerator.makeParquetFile(random, spark, filename, 100, options)
+ ParquetGenerator.makeParquetFile(
+ random,
+ spark,
+ filename,
+ 100,
+ SchemaGenOptions(generateArray = true, generateStruct = true,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
withSQLConf(
CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "false",
@@ -692,12 +672,8 @@ class CometArrayExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelp
spark,
filename,
100,
- DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = false,
- generateStruct = false,
- generateMap = false))
+ SchemaGenOptions(generateArray = false, generateStruct = false,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
val table = spark.read.parquet(filename)
table.createOrReplaceTempView("t1")
@@ -720,13 +696,13 @@ class CometArrayExpressionSuite extends CometTestBase
with AdaptiveSparkPlanHelp
val filename = path.toString
val random = new Random(42)
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
- val options = DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = true,
- generateStruct = true,
- generateMap = false)
- ParquetGenerator.makeParquetFile(random, spark, filename, 100, options)
+ ParquetGenerator.makeParquetFile(
+ random,
+ spark,
+ filename,
+ 100,
+ SchemaGenOptions(generateArray = true, generateStruct = true,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
withSQLConf(
CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "false",
@@ -773,13 +749,13 @@ class CometArrayExpressionSuite extends CometTestBase
with AdaptiveSparkPlanHelp
val filename = path.toString
val random = new Random(42)
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
- val options = DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = true,
- generateStruct = true,
- generateMap = false)
- ParquetGenerator.makeParquetFile(random, spark, filename, 100, options)
+ ParquetGenerator.makeParquetFile(
+ random,
+ spark,
+ filename,
+ 100,
+ SchemaGenOptions(generateArray = true, generateStruct = true,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
withSQLConf(
CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "false",
diff --git
a/spark/src/test/scala/org/apache/comet/CometBitwiseExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometBitwiseExpressionSuite.scala
index d89e81b0f..02c003ede 100644
--- a/spark/src/test/scala/org/apache/comet/CometBitwiseExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometBitwiseExpressionSuite.scala
@@ -25,7 +25,7 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.sql.CometTestBase
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
+import org.apache.comet.testing.{DataGenOptions, ParquetGenerator,
SchemaGenOptions}
class CometBitwiseExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
@@ -99,12 +99,8 @@ class CometBitwiseExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHe
spark,
filename,
100,
- DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = false,
- generateStruct = false,
- generateMap = false))
+ SchemaGenOptions(generateArray = false, generateStruct = false,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
val table = spark.read.parquet(filename)
checkSparkAnswerAndOperator(
@@ -174,12 +170,8 @@ class CometBitwiseExpressionSuite extends CometTestBase
with AdaptiveSparkPlanHe
spark,
filename,
10,
- DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = false,
- generateStruct = false,
- generateMap = false))
+ SchemaGenOptions(generateArray = false, generateStruct = false,
generateMap = false),
+ DataGenOptions(allowNull = true, generateNegativeZero = true))
}
val table = spark.read.parquet(filename)
val df =
diff --git a/spark/src/test/scala/org/apache/comet/CometFuzzTestBase.scala
b/spark/src/test/scala/org/apache/comet/CometFuzzTestBase.scala
index a69080e44..1c0636780 100644
--- a/spark/src/test/scala/org/apache/comet/CometFuzzTestBase.scala
+++ b/spark/src/test/scala/org/apache/comet/CometFuzzTestBase.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
import org.apache.spark.sql.internal.SQLConf
-import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
+import org.apache.comet.testing.{DataGenOptions, ParquetGenerator,
SchemaGenOptions}
class CometFuzzTestBase extends CometTestBase with AdaptiveSparkPlanHelper {
@@ -58,15 +58,20 @@ class CometFuzzTestBase extends CometTestBase with
AdaptiveSparkPlanHelper {
withSQLConf(
CometConf.COMET_ENABLED.key -> "false",
SQLConf.SESSION_LOCAL_TIMEZONE.key -> defaultTimezone) {
- val options =
- DataGenOptions(
- generateArray = true,
- generateStruct = true,
- generateNegativeZero = false,
- // override base date due to known issues with experimental scans
- baseDate =
- new SimpleDateFormat("YYYY-MM-DD hh:mm:ss").parse("2024-05-25
12:34:56").getTime)
- ParquetGenerator.makeParquetFile(random, spark, filename, 1000, options)
+ val schemaGenOptions =
+ SchemaGenOptions(generateArray = true, generateStruct = true)
+ val dataGenOptions = DataGenOptions(
+ generateNegativeZero = false,
+ // override base date due to known issues with experimental scans
+ baseDate =
+ new SimpleDateFormat("YYYY-MM-DD hh:mm:ss").parse("2024-05-25
12:34:56").getTime)
+ ParquetGenerator.makeParquetFile(
+ random,
+ spark,
+ filename,
+ 1000,
+ schemaGenOptions,
+ dataGenOptions)
}
}
diff --git a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala
b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala
index 398d96013..006112d2b 100644
--- a/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometFuzzTestSuite.scala
@@ -28,7 +28,7 @@ import
org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
import org.apache.spark.sql.types._
import org.apache.comet.DataTypeSupport.isComplexType
-import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
+import org.apache.comet.testing.{DataGenOptions, ParquetGenerator,
SchemaGenOptions}
class CometFuzzTestSuite extends CometFuzzTestBase {
@@ -261,11 +261,10 @@ class CometFuzzTestSuite extends CometFuzzTestBase {
generateArray: Boolean = true,
generateStruct: Boolean = true): Unit = {
- val options =
- DataGenOptions(
- generateArray = generateArray,
- generateStruct = generateStruct,
- generateNegativeZero = false)
+ val schemaGenOptions =
+ SchemaGenOptions(generateArray = generateArray, generateStruct =
generateStruct)
+
+ val dataGenOptions = DataGenOptions(generateNegativeZero = false)
withTempPath { filename =>
val random = new Random(42)
@@ -273,7 +272,13 @@ class CometFuzzTestSuite extends CometFuzzTestBase {
CometConf.COMET_ENABLED.key -> "false",
SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key ->
outputTimestampType.toString,
SQLConf.SESSION_LOCAL_TIMEZONE.key -> defaultTimezone) {
- ParquetGenerator.makeParquetFile(random, spark, filename.toString,
100, options)
+ ParquetGenerator.makeParquetFile(
+ random,
+ spark,
+ filename.toString,
+ 100,
+ schemaGenOptions,
+ dataGenOptions)
}
Seq(defaultTimezone, "UTC", "America/Denver").foreach { tz =>
diff --git
a/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala
index 3557ad348..88c13391a 100644
--- a/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.CometTestBase
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
-import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
+import org.apache.comet.testing.{DataGenOptions, ParquetGenerator,
SchemaGenOptions}
class CometMapExpressionSuite extends CometTestBase {
@@ -108,13 +108,16 @@ class CometMapExpressionSuite extends CometTestBase {
val filename = path.toString
val random = new Random(42)
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
- val options = DataGenOptions(
- allowNull = false,
- generateNegativeZero = false,
- generateArray = true,
- generateStruct = false,
- generateMap = false)
- ParquetGenerator.makeParquetFile(random, spark, filename, 100, options)
+ val schemaGenOptions =
+ SchemaGenOptions(generateArray = true, generateStruct = false,
generateMap = false)
+ val dataGenOptions = DataGenOptions(allowNull = false,
generateNegativeZero = false)
+ ParquetGenerator.makeParquetFile(
+ random,
+ spark,
+ filename,
+ 100,
+ schemaGenOptions,
+ dataGenOptions)
}
spark.read.parquet(filename).createOrReplaceTempView("t1")
val df = spark.sql("SELECT map_from_arrays(array(c12), array(c3)) FROM
t1")
diff --git
a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
index d0b1dfb36..211cc16d0 100644
--- a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.functions.{avg, count_distinct,
sum}
import org.apache.spark.sql.internal.SQLConf
import org.apache.comet.CometConf
-import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
+import org.apache.comet.testing.{DataGenOptions, ParquetGenerator,
SchemaGenOptions}
/**
* Test suite dedicated to Comet native aggregate operator
@@ -45,7 +45,13 @@ class CometAggregateSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
val filename = path.toString
val random = new Random(42)
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
- ParquetGenerator.makeParquetFile(random, spark, filename, 10000,
DataGenOptions())
+ ParquetGenerator.makeParquetFile(
+ random,
+ spark,
+ filename,
+ 10000,
+ SchemaGenOptions(),
+ DataGenOptions())
}
val tableName = "avg_decimal"
withTable(tableName) {
diff --git a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
index ab7081e10..1b15c39ca 100644
--- a/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/exec/CometExecSuite.scala
@@ -50,7 +50,7 @@ import org.apache.spark.unsafe.types.UTF8String
import org.apache.comet.{CometConf, ExtendedExplainInfo}
import org.apache.comet.CometSparkSessionExtensions.{isSpark35Plus,
isSpark40Plus}
-import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
+import org.apache.comet.testing.{DataGenOptions, ParquetGenerator,
SchemaGenOptions}
class CometExecSuite extends CometTestBase {
@@ -2052,13 +2052,16 @@ class CometExecSuite extends CometTestBase {
val filename = path.toString
val random = new Random(42)
withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
- val options = DataGenOptions(
- allowNull = true,
- generateNegativeZero = true,
- generateArray = true,
- generateStruct = true,
- generateMap = true)
- ParquetGenerator.makeParquetFile(random, spark, filename, 100, options)
+ val schemaGenOptions =
+ SchemaGenOptions(generateArray = true, generateStruct = true,
generateMap = true)
+ val dataGenOptions = DataGenOptions(allowNull = true,
generateNegativeZero = true)
+ ParquetGenerator.makeParquetFile(
+ random,
+ spark,
+ filename,
+ 100,
+ schemaGenOptions,
+ dataGenOptions)
}
withSQLConf(
CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "false",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]