This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 4b7b922e7 [GLUTEN-6750][CH] Fix optimize error if file mappings not
loaded (#6753)
4b7b922e7 is described below
commit 4b7b922e7c2db831f5ee372702e13b52d74c4fa3
Author: Wenzheng Liu <[email protected]>
AuthorDate: Fri Aug 9 10:09:16 2024 +0800
[GLUTEN-6750][CH] Fix optimize error if file mappings not loaded (#6753)
---
.../GlutenClickHouseMergeTreeWriteOnS3Suite.scala | 43 ++++++++++++++++++++++
cpp-ch/local-engine/local_engine_jni.cpp | 2 +
2 files changed, 45 insertions(+)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala
index c95b78858..6a473cc54 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteOnS3Suite.scala
@@ -764,5 +764,48 @@ class GlutenClickHouseMergeTreeWriteOnS3Suite
}
}
}
+
+ test("GLUTEN-6750: Optimize error if file metadata not exist") {
+ spark.sql(s"""
+ |DROP TABLE IF EXISTS lineitem_mergetree_bucket_s3;
+ |""".stripMargin)
+
+ spark.sql(s"""
+ |CREATE TABLE IF NOT EXISTS lineitem_mergetree_bucket_s3
+ |(
+ | l_orderkey bigint,
+ | l_partkey bigint,
+ | l_suppkey bigint,
+ | l_linenumber bigint,
+ | l_quantity double,
+ | l_extendedprice double,
+ | l_discount double,
+ | l_tax double,
+ | l_returnflag string,
+ | l_linestatus string,
+ | l_shipdate date,
+ | l_commitdate date,
+ | l_receiptdate date,
+ | l_shipinstruct string,
+ | l_shipmode string,
+ | l_comment string
+ |)
+ |USING clickhouse
+ |PARTITIONED BY (l_returnflag)
+ |CLUSTERED BY (l_orderkey)
+ |${if (sparkVersion.equals("3.2")) "" else "SORTED BY
(l_partkey)"} INTO 4 BUCKETS
+ |LOCATION 's3a://$BUCKET_NAME/lineitem_mergetree_bucket_s3'
+ |TBLPROPERTIES (storage_policy='__s3_main')
+ |""".stripMargin)
+
+ spark.sql(s"""
+ | insert into table lineitem_mergetree_bucket_s3
+ | select /*+ REPARTITION(3) */ * from lineitem
+ |""".stripMargin)
+
+ FileUtils.deleteDirectory(new File(S3_METADATA_PATH))
+ spark.sql("optimize lineitem_mergetree_bucket_s3")
+ spark.sql("drop table lineitem_mergetree_bucket_s3")
+ }
}
// scalastyle:off line.size.limit
diff --git a/cpp-ch/local-engine/local_engine_jni.cpp
b/cpp-ch/local-engine/local_engine_jni.cpp
index c4e8ec67b..db0dd8b62 100644
--- a/cpp-ch/local-engine/local_engine_jni.cpp
+++ b/cpp-ch/local-engine/local_engine_jni.cpp
@@ -995,6 +995,8 @@ JNIEXPORT jstring
Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn
// each task using its own CustomStorageMergeTree, don't reuse
auto temp_storage
=
local_engine::MergeTreeRelParser::copyToVirtualStorage(merge_tree_table,
context);
+ // prefetch all needed parts metadata before merge
+ local_engine::restoreMetaData(temp_storage, merge_tree_table, *context);
local_engine::TempStorageFreer freer{temp_storage->getStorageID()}; // to
release temp CustomStorageMergeTree with RAII
std::vector<DB::DataPartPtr> selected_parts =
local_engine::StorageMergeTreeFactory::instance().getDataPartsByNames(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]