This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 5af585d4c [GLUTEN-5344][VL] Add some parquet example files from 
parquet-mr for native read test (#5345)
5af585d4c is described below

commit 5af585d4ccba0dcc406e43533c607cac9e3e1975
Author: Yan Ma <[email protected]>
AuthorDate: Fri Apr 12 08:40:22 2024 +0800

    [GLUTEN-5344][VL] Add some parquet example files from parquet-mr for native 
read test (#5345)
---
 .../parquet-for-read/test-append_1.parquet         | Bin 0 -> 7375 bytes
 .../parquet-for-read/test-append_2.parquet         | Bin 0 -> 7374 bytes
 .../test-empty-row-group_1.parquet                 | Bin 0 -> 191 bytes
 .../test-empty-row-group_2.parquet                 | Bin 0 -> 675 bytes
 .../test-empty-row-group_3.parquet                 | Bin 0 -> 781 bytes
 .../test-file-with-no-column-indexes-1.parquet     | Bin 0 -> 35855 bytes
 .../sql/execution/VeloxParquetReadSuite.scala      |  46 +++++++++++++++++++++
 7 files changed, 46 insertions(+)

diff --git 
a/backends-velox/src/test/resources/parquet-for-read/test-append_1.parquet 
b/backends-velox/src/test/resources/parquet-for-read/test-append_1.parquet
new file mode 100644
index 000000000..a255f86eb
Binary files /dev/null and 
b/backends-velox/src/test/resources/parquet-for-read/test-append_1.parquet 
differ
diff --git 
a/backends-velox/src/test/resources/parquet-for-read/test-append_2.parquet 
b/backends-velox/src/test/resources/parquet-for-read/test-append_2.parquet
new file mode 100644
index 000000000..3081f893f
Binary files /dev/null and 
b/backends-velox/src/test/resources/parquet-for-read/test-append_2.parquet 
differ
diff --git 
a/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_1.parquet
 
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_1.parquet
new file mode 100644
index 000000000..ac8c2dcff
Binary files /dev/null and 
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_1.parquet
 differ
diff --git 
a/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_2.parquet
 
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_2.parquet
new file mode 100644
index 000000000..56fe96fed
Binary files /dev/null and 
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_2.parquet
 differ
diff --git 
a/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_3.parquet
 
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_3.parquet
new file mode 100644
index 000000000..7efd8a81a
Binary files /dev/null and 
b/backends-velox/src/test/resources/parquet-for-read/test-empty-row-group_3.parquet
 differ
diff --git 
a/backends-velox/src/test/resources/parquet-for-read/test-file-with-no-column-indexes-1.parquet
 
b/backends-velox/src/test/resources/parquet-for-read/test-file-with-no-column-indexes-1.parquet
new file mode 100644
index 000000000..722e687ee
Binary files /dev/null and 
b/backends-velox/src/test/resources/parquet-for-read/test-file-with-no-column-indexes-1.parquet
 differ
diff --git 
a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
 
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
new file mode 100644
index 000000000..d5828c738
--- /dev/null
+++ 
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.gluten.execution.{BasicScanExecTransformer, 
VeloxWholeStageTransformerSuite}
+
+import java.io.File
+
+class VeloxParquetReadSuite extends VeloxWholeStageTransformerSuite {
+  override protected val resourcePath: String = "/parquet-for-read"
+  override protected val fileFormat: String = "parquet"
+
+  testWithSpecifiedSparkVersion("read example parquet files", Some("3.5"), 
Some("3.5")) {
+    withTable("test_table") {
+      val dir = new File(getClass.getResource(resourcePath).getFile)
+      val files = dir.listFiles
+      if (files != null) {
+        files.foreach {
+          file =>
+            // Exclude parquet files failed to read by velox for now
+            if (file.getName != "test-file-with-no-column-indexes-1.parquet") {
+              val df = spark.read.parquet(file.getAbsolutePath)
+              df.createOrReplaceTempView("test_table")
+              runQueryAndCompare("select * from test_table") {
+                checkGlutenOperatorMatch[BasicScanExecTransformer]
+              }
+            }
+        }
+      }
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to