(incubator-gluten) branch main updated: [GLUTEN-6750][CH] Fix optimize error if file mappings not loaded (#6753)

changchen Thu, 08 Aug 2024 19:09:25 -0700

This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 4b7b922e7 [GLUTEN-6750][CH] Fix optimize error if file mappings not 
loaded (#6753)
4b7b922e7 is described below

commit 4b7b922e7c2db831f5ee372702e13b52d74c4fa3
Author: Wenzheng Liu <[email protected]>
AuthorDate: Fri Aug 9 10:09:16 2024 +0800

    [GLUTEN-6750][CH] Fix optimize error if file mappings not loaded (#6753)
---
 .../GlutenClickHouseMergeTreeWriteOnS3Suite.scala  | 43 ++++++++++++++++++++++
 cpp-ch/local-engine/local_engine_jni.cpp           |  2 +
 2 files changed, 45 insertions(+)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala
index c95b78858..6a473cc54 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala
@@ -764,5 +764,48 @@ class GlutenClickHouseMergeTreeWriteOnS3Suite
       }
     }
   }
+
+  test("GLUTEN-6750: Optimize error if file metadata not exist") {
+    spark.sql(s"""
+                 |DROP TABLE IF EXISTS lineitem_mergetree_bucket_s3;
+                 |""".stripMargin)
+
+    spark.sql(s"""
+                 |CREATE TABLE IF NOT EXISTS lineitem_mergetree_bucket_s3
+                 |(
+                 | l_orderkey      bigint,
+                 | l_partkey       bigint,
+                 | l_suppkey       bigint,
+                 | l_linenumber    bigint,
+                 | l_quantity      double,
+                 | l_extendedprice double,
+                 | l_discount      double,
+                 | l_tax           double,
+                 | l_returnflag    string,
+                 | l_linestatus    string,
+                 | l_shipdate      date,
+                 | l_commitdate    date,
+                 | l_receiptdate   date,
+                 | l_shipinstruct  string,
+                 | l_shipmode      string,
+                 | l_comment       string
+                 |)
+                 |USING clickhouse
+                 |PARTITIONED BY (l_returnflag)
+                 |CLUSTERED BY (l_orderkey)
+                 |${if (sparkVersion.equals("3.2")) "" else "SORTED BY 
(l_partkey)"} INTO 4 BUCKETS
+                 |LOCATION 's3a://$BUCKET_NAME/lineitem_mergetree_bucket_s3'
+                 |TBLPROPERTIES (storage_policy='__s3_main')
+                 |""".stripMargin)
+
+    spark.sql(s"""
+                 | insert into table lineitem_mergetree_bucket_s3
+                 | select /*+ REPARTITION(3) */ * from lineitem
+                 |""".stripMargin)
+
+    FileUtils.deleteDirectory(new File(S3_METADATA_PATH))
+    spark.sql("optimize lineitem_mergetree_bucket_s3")
+    spark.sql("drop table lineitem_mergetree_bucket_s3")
+  }
 }
 // scalastyle:off line.size.limit
diff --git a/cpp-ch/local-engine/local_engine_jni.cpp 
b/cpp-ch/local-engine/local_engine_jni.cpp
index c4e8ec67b..db0dd8b62 100644
--- a/cpp-ch/local-engine/local_engine_jni.cpp
+++ b/cpp-ch/local-engine/local_engine_jni.cpp
@@ -995,6 +995,8 @@ JNIEXPORT jstring 
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn
     // each task using its own CustomStorageMergeTree, don't reuse
     auto temp_storage
         = 
local_engine::MergeTreeRelParser::copyToVirtualStorage(merge_tree_table, 
context);
+    // prefetch all needed parts metadata before merge
+    local_engine::restoreMetaData(temp_storage, merge_tree_table, *context);
 
     local_engine::TempStorageFreer freer{temp_storage->getStorageID()}; // to 
release temp CustomStorageMergeTree with RAII
     std::vector<DB::DataPartPtr> selected_parts = 
local_engine::StorageMergeTreeFactory::instance().getDataPartsByNames(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-6750][CH] Fix optimize error if file mappings not loaded (#6753)

Reply via email to