This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch data_col in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
commit d73a7f4b32035f0012849025dec36cf543d380a5 Author: Rui Mo <[email protected]> AuthorDate: Tue May 27 13:59:56 2025 +0800 Use column names --- cpp/velox/substrait/SubstraitToVeloxPlan.cc | 41 +++++++++++++++++++---------- ep/build-velox/src/get_velox.sh | 4 +-- 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index d4ec14d085..c8bf978e62 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -583,17 +583,19 @@ std::shared_ptr<connector::hive::HiveInsertTableHandle> makeHiveInsertTableHandl } if (std::find(partitionedBy.cbegin(), partitionedBy.cend(), tableColumnNames.at(i)) != partitionedBy.cend()) { ++numPartitionColumns; - columnHandles.emplace_back(std::make_shared<connector::hive::HiveColumnHandle>( - tableColumnNames.at(i), - connector::hive::HiveColumnHandle::ColumnType::kPartitionKey, - tableColumnTypes.at(i), - tableColumnTypes.at(i))); + columnHandles.emplace_back( + std::make_shared<connector::hive::HiveColumnHandle>( + tableColumnNames.at(i), + connector::hive::HiveColumnHandle::ColumnType::kPartitionKey, + tableColumnTypes.at(i), + tableColumnTypes.at(i))); } else { - columnHandles.emplace_back(std::make_shared<connector::hive::HiveColumnHandle>( - tableColumnNames.at(i), - connector::hive::HiveColumnHandle::ColumnType::kRegular, - tableColumnTypes.at(i), - tableColumnTypes.at(i))); + columnHandles.emplace_back( + std::make_shared<connector::hive::HiveColumnHandle>( + tableColumnNames.at(i), + connector::hive::HiveColumnHandle::ColumnType::kRegular, + tableColumnTypes.at(i), + tableColumnTypes.at(i))); } } VELOX_CHECK_EQ(numPartitionColumns, partitionedBy.size()); @@ -1298,15 +1300,26 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: SubstraitParser::parseColumnTypes(baseSchema, columnTypes); } - // Velox requires Filter Pushdown must being enabled. - bool filterPushdownEnabled = true; + // Data columns are used as requested type in Velox. To support reading binary as string, requested type needs to be + // provided. To avoid the type check between element type and array type for unannotated array, we add this + // temporary workaround to only use requested type when the type includes VARCHAR. auto names = colNameList; auto types = veloxTypeList; + bool needsRequestedType = std::any_of( + veloxTypeList.begin(), veloxTypeList.end(), [](const auto& type) { return type->kind() == TypeKind::VARCHAR; }); auto dataColumns = ROW(std::move(names), std::move(types)); + + // Velox requires Filter Pushdown must being enabled. + bool filterPushdownEnabled = true; std::shared_ptr<connector::hive::HiveTableHandle> tableHandle; if (!readRel.has_filter()) { tableHandle = std::make_shared<connector::hive::HiveTableHandle>( - kHiveConnectorId, "hive_table", filterPushdownEnabled, common::SubfieldFilters{}, nullptr, dataColumns); + kHiveConnectorId, + "hive_table", + filterPushdownEnabled, + common::SubfieldFilters{}, + nullptr, + needsRequestedType ? dataColumns : nullptr); } else { common::SubfieldFilters subfieldFilters; auto remainingFilter = exprConverter_->toVeloxExpr(readRel.filter(), dataColumns); @@ -1317,7 +1330,7 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: filterPushdownEnabled, std::move(subfieldFilters), remainingFilter, - dataColumns); + needsRequestedType ? dataColumns : nullptr); } // Get assignments and out names. diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 44a8af58ba..49bf2d4ae9 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -16,8 +16,8 @@ set -exu -VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2025_05_29 +VELOX_REPO=https://github.com/rui-mo/velox.git +VELOX_BRANCH=test VELOX_HOME="" OS=`uname -s` --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
