This is an automated email from the ASF dual-hosted git repository.
rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 20136c86ac [VL] Add test for scan operator with filter on
decimal/timestamp/binary field (#7945)
20136c86ac is described below
commit 20136c86acb3e63bf7834e1c1a636db26c92bd88
Author: Rui Mo <[email protected]>
AuthorDate: Thu Nov 14 15:21:22 2024 +0800
[VL] Add test for scan operator with filter on decimal/timestamp/binary
field (#7945)
---
.../apache/gluten/execution/VeloxScanSuite.scala | 36 ++++++++++++++++++++++
.../benchmarks/RandomParquetDataGenerator.scala | 6 ++--
2 files changed, 39 insertions(+), 3 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxScanSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxScanSuite.scala
index a50bffa3ed..a4f16ecc3c 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxScanSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxScanSuite.scala
@@ -18,11 +18,13 @@ package org.apache.gluten.execution
import org.apache.gluten.GlutenConfig
import org.apache.gluten.backendsapi.velox.VeloxBackendSettings
+import org.apache.gluten.benchmarks.RandomParquetDataGenerator
import org.apache.gluten.utils.VeloxFileSystemValidationJniWrapper
import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.expressions.GreaterThan
import org.apache.spark.sql.execution.ScalarSubquery
+import org.apache.spark.sql.types._
class VeloxScanSuite extends VeloxWholeStageTransformerSuite {
protected val rootPath: String = getClass.getResource("/").getPath
@@ -114,4 +116,38 @@ class VeloxScanSuite extends
VeloxWholeStageTransformerSuite {
!VeloxFileSystemValidationJniWrapper.allSupportedByRegisteredFileSystems(
Array("file:/test_path/", "unsupported://test_path")))
}
+
+ test("scan with filter on decimal/timestamp/binary field") {
+ withTempView("t") {
+ withTempDir {
+ dir =>
+ val path = dir.getAbsolutePath
+ val schema = StructType(
+ Array(
+ StructField("short_decimal_field", DecimalType(5, 2), nullable =
true),
+ StructField("long_decimal_field", DecimalType(32, 8), nullable =
true),
+ StructField("binary_field", BinaryType, nullable = true),
+ StructField("timestamp_field", TimestampType, nullable = true)
+ ))
+ RandomParquetDataGenerator(0).generateRandomData(spark, schema, 10,
Some(path))
+ spark.catalog.createTable("t", path, "parquet")
+
+ runQueryAndCompare(
+ """select * from t where long_decimal_field = 3.14""".stripMargin
+ )(checkGlutenOperatorMatch[FileSourceScanExecTransformer])
+
+ runQueryAndCompare(
+ """select * from t where short_decimal_field = 3.14""".stripMargin
+ )(checkGlutenOperatorMatch[FileSourceScanExecTransformer])
+
+ runQueryAndCompare(
+ """select * from t where binary_field = '3.14'""".stripMargin
+ )(checkGlutenOperatorMatch[FileSourceScanExecTransformer])
+
+ runQueryAndCompare(
+ """select * from t where timestamp_field =
current_timestamp()""".stripMargin
+ )(checkGlutenOperatorMatch[FileSourceScanExecTransformer])
+ }
+ }
+ }
}
diff --git
a/gluten-substrait/src/test/scala/org/apache/gluten/benchmarks/RandomParquetDataGenerator.scala
b/gluten-substrait/src/test/scala/org/apache/gluten/benchmarks/RandomParquetDataGenerator.scala
index a27a4991cf..ce2f85af77 100644
---
a/gluten-substrait/src/test/scala/org/apache/gluten/benchmarks/RandomParquetDataGenerator.scala
+++
b/gluten-substrait/src/test/scala/org/apache/gluten/benchmarks/RandomParquetDataGenerator.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.types._
import com.github.javafaker.Faker
-import java.sql.Date
+import java.sql.{Date, Timestamp}
import java.util.Random
case class RandomParquetDataGenerator(initialSeed: Long = 0L) extends Logging {
@@ -67,7 +67,7 @@ case class RandomParquetDataGenerator(initialSeed: Long = 0L)
extends Logging {
case DoubleType =>
faker.number().randomDouble(2, Double.MinValue.toLong,
Double.MaxValue.toLong)
case DateType => new Date(faker.date().birthday().getTime)
-// case TimestampType => new Timestamp(faker.date().birthday().getTime)
+ case TimestampType => new Timestamp(faker.date().birthday().getTime)
case t: DecimalType =>
BigDecimal(
faker.number().randomDouble(t.scale, 0, Math.pow(10, t.precision -
t.scale).toLong))
@@ -124,7 +124,7 @@ case class RandomParquetDataGenerator(initialSeed: Long =
0L) extends Logging {
() => StructField(fieldName, FloatType, nullable = true),
() => StructField(fieldName, DoubleType, nullable = true),
() => StructField(fieldName, DateType, nullable = true),
-// () => StructField(fieldName, TimestampType, nullable = true),
+ () => StructField(fieldName, TimestampType, nullable = true),
() => StructField(fieldName, DecimalType(10, 2), nullable = true),
() => StructField(fieldName, DecimalType(30, 10), nullable = true)
)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]