This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 5af585d4c [GLUTEN-5344][VL] Add some parquet example files from
parquet-mr for native read test (#5345)
5af585d4c is described below
commit 5af585d4ccba0dcc406e43533c607cac9e3e1975
Author: Yan Ma <[email protected]>
AuthorDate: Fri Apr 12 08:40:22 2024 +0800
[GLUTEN-5344][VL] Add some parquet example files from parquet-mr for native
read test (#5345)
---
.../parquet-for-read/test-append_1.parquet | Bin 0 -> 7375 bytes
.../parquet-for-read/test-append_2.parquet | Bin 0 -> 7374 bytes
.../test-empty-row-group_1.parquet | Bin 0 -> 191 bytes
.../test-empty-row-group_2.parquet | Bin 0 -> 675 bytes
.../test-empty-row-group_3.parquet | Bin 0 -> 781 bytes
.../test-file-with-no-column-indexes-1.parquet | Bin 0 -> 35855 bytes
.../sql/execution/VeloxParquetReadSuite.scala | 46 +++++++++++++++++++++
7 files changed, 46 insertions(+)
diff --git
a/backends-velox/src/test/resources/parquet-for-read/test-append_1.parquet
b/backends-velox/src/test/resources/parquet-for-read/test-append_1.parquet
new file mode 100644
index 000000000..a255f86eb
Binary files /dev/null and
b/backends-velox/src/test/resources/parquet-for-read/test-append_1.parquet
differ
diff --git
a/backends-velox/src/test/resources/parquet-for-read/test-append_2.parquet
b/backends-velox/src/test/resources/parquet-for-read/test-append_2.parquet
new file mode 100644
index 000000000..3081f893f
Binary files /dev/null and
b/backends-velox/src/test/resources/parquet-for-read/test-append_2.parquet
differ
diff --git
a/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_1.parquet
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_1.parquet
new file mode 100644
index 000000000..ac8c2dcff
Binary files /dev/null and
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_1.parquet
differ
diff --git
a/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_2.parquet
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_2.parquet
new file mode 100644
index 000000000..56fe96fed
Binary files /dev/null and
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_2.parquet
differ
diff --git
a/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_3.parquet
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_3.parquet
new file mode 100644
index 000000000..7efd8a81a
Binary files /dev/null and
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_3.parquet
differ
diff --git
a/backends-velox/src/test/resources/parquet-for-read/test-file-with-no-column-indexes-1.parquet
b/backends-velox/src/test/resources/parquet-for-read/test-file-with-no-column-indexes-1.parquet
new file mode 100644
index 000000000..722e687ee
Binary files /dev/null and
b/backends-velox/src/test/resources/parquet-for-read/test-file-with-no-column-indexes-1.parquet
differ
diff --git
a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
new file mode 100644
index 000000000..d5828c738
--- /dev/null
+++
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.gluten.execution.{BasicScanExecTransformer,
VeloxWholeStageTransformerSuite}
+
+import java.io.File
+
+class VeloxParquetReadSuite extends VeloxWholeStageTransformerSuite {
+ override protected val resourcePath: String = "/parquet-for-read"
+ override protected val fileFormat: String = "parquet"
+
+ testWithSpecifiedSparkVersion("read example parquet files", Some("3.5"),
Some("3.5")) {
+ withTable("test_table") {
+ val dir = new File(getClass.getResource(resourcePath).getFile)
+ val files = dir.listFiles
+ if (files != null) {
+ files.foreach {
+ file =>
+ // Exclude parquet files failed to read by velox for now
+ if (file.getName != "test-file-with-no-column-indexes-1.parquet") {
+ val df = spark.read.parquet(file.getAbsolutePath)
+ df.createOrReplaceTempView("test_table")
+ runQueryAndCompare("select * from test_table") {
+ checkGlutenOperatorMatch[BasicScanExecTransformer]
+ }
+ }
+ }
+ }
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]