This is an automated email from the ASF dual-hosted git repository.
Gabriel39 pushed a commit to branch refact_reader_branch
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/refact_reader_branch by this
push:
new 0fb11e4e0c3 cast for schema change (#63477)
0fb11e4e0c3 is described below
commit 0fb11e4e0c3751baeec63421d37cbec6bd7dd479
Author: Gabriel <[email protected]>
AuthorDate: Thu May 21 17:18:54 2026 +0800
cast for schema change (#63477)
---
be/src/exprs/vslot_ref.h | 2 +-
be/src/format/reader/column_mapper.cpp | 41 ++--
be/src/format/reader/column_mapper.h | 8 +-
be/src/format/reader/expr/cast.cpp | 131 +++++++++++++
.../vslot_ref.h => format/reader/expr/cast.h} | 60 ++----
be/src/format/reader/expr/slot_ref.h | 16 +-
be/src/format/reader/file_reader.h | 7 +
be/src/format/reader/table_reader.h | 10 +-
be/test/format/reader/expr/cast_test.cpp | 210 +++++++++++++++++++++
be/test/format/reader/expr/table_expr_test.cpp | 11 +-
10 files changed, 422 insertions(+), 74 deletions(-)
diff --git a/be/src/exprs/vslot_ref.h b/be/src/exprs/vslot_ref.h
index 3ac9f641c19..ceb702728eb 100644
--- a/be/src/exprs/vslot_ref.h
+++ b/be/src/exprs/vslot_ref.h
@@ -31,7 +31,7 @@ class TExprNode;
class Block;
class VExprContext;
-class VSlotRef MOCK_REMOVE(final) : public VExpr {
+class VSlotRef : public VExpr {
ENABLE_FACTORY_CREATOR(VSlotRef);
public:
diff --git a/be/src/format/reader/column_mapper.cpp
b/be/src/format/reader/column_mapper.cpp
index 7006365b054..7510413d07f 100644
--- a/be/src/format/reader/column_mapper.cpp
+++ b/be/src/format/reader/column_mapper.cpp
@@ -20,20 +20,20 @@
#include <vector>
#include "common/status.h"
-#include "expr/slot_ref.h"
+#include "format/reader/expr/cast.h"
+#include "format/reader/expr/slot_ref.h"
#include "format/reader/file_reader.h"
#include "format/reader/table_reader.h"
namespace doris::reader {
+static constexpr const char* ROW_LINEAGE_ROW_ID = "_row_id";
+static constexpr const char* ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER =
"_last_updated_sequence_number";
+
Status TableColumnMapper::create_mapping(const std::vector<TableColumn>&
projected_columns,
- std::vector<SchemaField> block_schema,
const std::map<std::string, Field>&
partition_values,
const std::vector<SchemaField>&
file_schema) {
- // 真实实现会做 field id/name matching、类型转换、复杂列 child mapping、缺失列
- // default/partition/generated 表达式构造。
_mappings.clear();
- block_schema.clear();
for (const auto& table_column : projected_columns) {
ColumnMapping mapping;
mapping.table_column_id = table_column.id;
@@ -43,24 +43,31 @@ Status TableColumnMapper::create_mapping(const
std::vector<TableColumn>& project
mapping.file_type = file_field->type;
mapping.is_trivial = _is_same_type(mapping.table_type,
mapping.file_type);
if (!mapping.is_trivial) {
- // TODO:
- return Status::NotSupported(
- "column mapping with type conversion is not supported
yet: table column "
- "'{}' (id={}, type={}) vs file column (id={},
type={})",
- table_column.name, mapping.table_column_id,
mapping.table_type->get_name(),
- mapping.file_column_id.value(),
mapping.file_type->get_name());
+ // 1. Data type mismatch (caused by schema evolution) and
casting is needed.
+ auto expr = Cast::create_shared(mapping.table_type);
+
expr->add_child(TableSlotRef::create_shared(mapping.file_column_id.value(),
+
mapping.file_column_id.value(), -1,
+ mapping.file_type,
file_field->name));
+ mapping.projection = VExprContext::create_shared(expr);
} else {
+ // 2. Data type matches, trivial mapping.
mapping.projection =
VExprContext::create_shared(TableSlotRef::create_shared(
- *mapping.file_column_id, block_schema.size(), -1,
mapping.table_type));
+ mapping.file_column_id.value(),
mapping.file_column_id.value(), -1,
+ mapping.file_type, file_field->name));
}
- block_schema.push_back(SchemaField {
- mapping.file_column_id.value(), table_column.name,
mapping.table_type, {}});
- } else if (table_column.default_expr != nullptr) {
- mapping.is_constant = true;
- mapping.default_expr = table_column.default_expr;
} else if (table_column.is_partition_key &&
partition_values.count(table_column.name) > 0) {
+ // 3. Partition column, use partition value as a constant mapping.
Note that partition column may also have default expression, but partition
value should take precedence if it exists.
mapping.default_expr =
VExprContext::create_shared(TableLiteral::create_shared(
mapping.table_type,
partition_values.at(table_column.name)));
+ } else if (table_column.default_expr != nullptr) {
+ // 4. Table column does not exist in file (column adding by schema
evolution), which has a default expression, use it as a constant mapping.
+ mapping.is_constant = true;
+ mapping.default_expr = table_column.default_expr;
+ } else if (table_column.name == ROW_LINEAGE_ROW_ID) {
+ // 5. Virtual column, use special mapping to indicate it should be
materialized by table reader instead of read from file or evaluated from
expression.
+ mapping.virtual_column_type = TableVirtualColumnType::ROW_ID;
+ } else if (table_column.name == ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER) {
+ mapping.virtual_column_type =
TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER;
} else {
if (table_column.is_partition_key) {
return Status::InvalidArgument(
diff --git a/be/src/format/reader/column_mapper.h
b/be/src/format/reader/column_mapper.h
index 4c6b510ff0e..d0d8076798b 100644
--- a/be/src/format/reader/column_mapper.h
+++ b/be/src/format/reader/column_mapper.h
@@ -40,6 +40,12 @@ enum class TableColumnMappingMode {
BY_NAME,
};
+enum TableVirtualColumnType {
+ INVALID = 0, // not a virtual column
+ ROW_ID = 1,
+ LAST_UPDATED_SEQUENCE_NUMBER = 2,
+};
+
// 单个 table column 到 file column 的映射结果。
// 这是 table 层和 file 层的核心边界对象。
struct ColumnMapping {
@@ -59,6 +65,7 @@ struct ColumnMapping {
std::vector<ColumnMapping> child_mappings;
bool is_trivial = false;
bool is_constant = false;
+ TableVirtualColumnType virtual_column_type =
TableVirtualColumnType::INVALID;
VExprContextSPtr default_expr;
};
@@ -81,7 +88,6 @@ public:
// 输出的 ColumnMapping 描述 table column 如何从 file column、常量列或表达式得到;
// 后续 projection、filter localization 和 table block finalize 都应复用这份映射。
virtual Status create_mapping(const std::vector<TableColumn>&
projected_columns,
- std::vector<SchemaField> block_schema,
const std::map<std::string, Field>&
partition_values,
const std::vector<SchemaField>& file_schema);
diff --git a/be/src/format/reader/expr/cast.cpp
b/be/src/format/reader/expr/cast.cpp
new file mode 100644
index 00000000000..69af83c9e77
--- /dev/null
+++ b/be/src/format/reader/expr/cast.cpp
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format/reader/expr/cast.h"
+
+#include <fmt/format.h>
+#include <gen_cpp/Exprs_types.h>
+#include <glog/logging.h>
+
+#include <ostream>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/block/column_with_type_and_name.h"
+#include "core/block/columns_with_type_and_name.h"
+#include "exprs/function/simple_function_factory.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vliteral.h"
+
+namespace doris {
+
+Status Cast::prepare(RuntimeState* state, const RowDescriptor& desc,
VExprContext* context) {
+ RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+ if (_children.size() != 1) {
+ return Status::InternalError(
+ fmt::format("Cast should have exactly 1 child expr, but got
{}", _children.size()));
+ }
+ ColumnsWithTypeAndName argument_template;
+ argument_template.reserve(_children.size());
+ if (_children[0]->is_literal()) {
+ // For some functions, he needs some literal columns to derive the
return type.
+ auto literal_node = std::dynamic_pointer_cast<VLiteral>(_children[0]);
+ argument_template.emplace_back(literal_node->get_column_ptr(),
_children[0]->data_type(),
+ _children[0]->expr_name());
+ } else {
+ argument_template.emplace_back(nullptr, _children[0]->data_type(),
+ _children[0]->expr_name());
+ }
+
+ _expr_name = fmt::format("CAST(arguments={},return={})",
_children[0]->data_type()->get_name(),
+ _data_type->get_name());
+ // get the function. won't prepare function.
+ _function = SimpleFunctionFactory::instance().get_function(
+ "CAST", argument_template, _data_type,
+ {.new_version_unix_timestamp =
state->query_options().new_version_unix_timestamp},
+ state->be_exec_version());
+ if (_function == nullptr) {
+ return Status::InternalError("Could not find function {} ",
_expr_name);
+ }
+ VExpr::register_function_context(state, context);
+ _prepare_finished = true;
+ return Status::OK();
+}
+
+Status Cast::open(RuntimeState* state, VExprContext* context,
+ FunctionContext::FunctionStateScope scope) {
+ DCHECK(_prepare_finished);
+ for (auto& i : _children) {
+ RETURN_IF_ERROR(i->open(state, context, scope));
+ }
+ RETURN_IF_ERROR(VExpr::init_function_context(state, context, scope,
_function));
+ if (scope == FunctionContext::FRAGMENT_LOCAL) {
+ RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr));
+ }
+ _open_finished = true;
+ return Status::OK();
+}
+
+void Cast::close(VExprContext* context, FunctionContext::FunctionStateScope
scope) {
+ VExpr::close_function_context(context, scope, _function);
+ VExpr::close(context, scope);
+}
+
+Status Cast::execute_column_impl(VExprContext* context, const Block* block,
+ const Selector* selector, size_t count,
+ ColumnPtr& result_column) const {
+ return _do_execute(context, block, selector, count, result_column);
+}
+
+std::string Cast::debug_string() const {
+ return _expr_name;
+}
+
+Status Cast::_do_execute(VExprContext* context, const Block* block, const
Selector* selector,
+ size_t count, ColumnPtr& result_column) const {
+ DCHECK(_open_finished || block == nullptr) << debug_string();
+ if (_children.size() != 1) {
+ return Status::InternalError(
+ fmt::format("Cast should have exactly 1 child expr, but got
{}", _children.size()));
+ }
+ if (is_const_and_have_executed()) { // const have executed in open function
+ result_column = get_result_from_const(count);
+ return Status::OK();
+ }
+
+ Block temp_block;
+ ColumnNumbers args(1);
+
+ ColumnPtr tmp_arg_column;
+ RETURN_IF_ERROR(_children[0]->execute_column(context, block, selector,
count, tmp_arg_column));
+ auto arg_type = _children[0]->execute_type(block);
+ temp_block.insert({tmp_arg_column, arg_type, _children[0]->expr_name()});
+ args[0] = 0;
+
+ uint32_t num_columns_without_result = temp_block.columns();
+ // prepare a column to save result
+ temp_block.insert({nullptr, _data_type, _expr_name});
+
+ RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index),
temp_block, args,
+ num_columns_without_result, count));
+ result_column =
temp_block.get_by_position(num_columns_without_result).column;
+ DCHECK_EQ(result_column->size(), count);
+ RETURN_IF_ERROR(result_column->column_self_check());
+ return Status::OK();
+}
+
+} // namespace doris
diff --git a/be/src/exprs/vslot_ref.h b/be/src/format/reader/expr/cast.h
similarity index 50%
copy from be/src/exprs/vslot_ref.h
copy to be/src/format/reader/expr/cast.h
index 3ac9f641c19..7d8ca437ba3 100644
--- a/be/src/exprs/vslot_ref.h
+++ b/be/src/format/reader/expr/cast.h
@@ -16,72 +16,44 @@
// under the License.
#pragma once
+
#include <string>
#include "common/object_pool.h"
#include "common/status.h"
+#include "exprs/function_context.h"
#include "exprs/vexpr.h"
namespace doris {
-class SlotDescriptor;
class RowDescriptor;
class RuntimeState;
class TExprNode;
-
class Block;
class VExprContext;
+} // namespace doris
+
+namespace doris {
-class VSlotRef MOCK_REMOVE(final) : public VExpr {
- ENABLE_FACTORY_CREATOR(VSlotRef);
+class Cast final : public VExpr {
+ ENABLE_FACTORY_CREATOR(Cast);
public:
- VSlotRef(const TExprNode& node);
- VSlotRef(const SlotDescriptor* desc);
-#ifdef BE_TEST
- VSlotRef() = default;
- void set_column_id(int column_id) { _column_id = column_id; }
- void set_slot_id(int slot_id) { _slot_id = slot_id; }
-#endif
+ Cast(const DataTypePtr& type) { _data_type = type; }
+ ~Cast() override = default;
Status prepare(RuntimeState* state, const RowDescriptor& desc,
VExprContext* context) override;
Status open(RuntimeState* state, VExprContext* context,
FunctionContext::FunctionStateScope scope) override;
- Status execute(VExprContext* context, Block* block, int* result_column_id)
const override;
+ void close(VExprContext* context, FunctionContext::FunctionStateScope
scope) override;
Status execute_column_impl(VExprContext* context, const Block* block,
const Selector* selector,
size_t count, ColumnPtr& result_column) const
override;
- DataTypePtr execute_type(const Block* block) const override;
-
- const std::string& expr_name() const override;
- std::string expr_label() override;
std::string debug_string() const override;
- bool is_constant() const override { return false; }
-
- int column_id() const { return _column_id; }
-
- MOCK_FUNCTION int slot_id() const { return _slot_id; }
-
- bool equals(const VExpr& other) override;
-
- size_t estimate_memory(const size_t rows) override { return 0; }
-
- void collect_slot_column_ids(std::set<int>& column_ids) const override {
- column_ids.insert(_column_id);
- }
-
- MOCK_FUNCTION const std::string& column_name() const { return
*_column_name; }
-
- uint64_t get_digest(uint64_t seed) const override;
-
- double execute_cost() const override { return 0.0; }
-
-protected:
- VSlotRef(int slot_id, int column_id, int column_uniq_id)
- : _slot_id(slot_id), _column_id(column_id),
_column_uniq_id(column_uniq_id) {}
+ uint64_t get_digest(uint64_t seed) const override { return 0; }
+ const std::string& expr_name() const override { return _expr_name; }
private:
- int _slot_id;
- int _column_id;
- int _column_uniq_id = -1;
- const std::string* _column_name = nullptr;
- const std::string _column_label;
+ Status _do_execute(VExprContext* context, const Block* block, const
Selector* selector,
+ size_t count, ColumnPtr& result_column) const;
+ std::string _expr_name;
+ FunctionBasePtr _function;
};
} // namespace doris
diff --git a/be/src/format/reader/expr/slot_ref.h
b/be/src/format/reader/expr/slot_ref.h
index 6b5d027602e..fd4782a1bdd 100644
--- a/be/src/format/reader/expr/slot_ref.h
+++ b/be/src/format/reader/expr/slot_ref.h
@@ -26,14 +26,26 @@ class TableSlotRef : public VSlotRef {
ENABLE_FACTORY_CREATOR(TableSlotRef);
public:
- TableSlotRef(int slot_id, int column_id, int column_uniq_id, const
DataTypePtr& type)
- : VSlotRef(slot_id, column_id, column_uniq_id) {
+ TableSlotRef(int slot_id, int column_id, int column_uniq_id, const
DataTypePtr& type,
+ const std::string& column_name)
+ : VSlotRef(slot_id, column_id, column_uniq_id),
_cname(column_name) {
_data_type = type;
}
Status prepare(RuntimeState* state, const RowDescriptor& desc,
VExprContext* context) override {
+ if (_prepared) {
+ return Status::OK();
+ }
+ _prepared = true;
+ _prepare_finished = true;
return Status::OK();
}
+
+ const std::string& expr_name() const override { return _cname; }
+ const std::string& column_name() const override { return _cname; }
+
+private:
+ const std::string _cname;
};
} // namespace doris
diff --git a/be/src/format/reader/file_reader.h
b/be/src/format/reader/file_reader.h
index 6dfbb4a8420..96ace67d8de 100644
--- a/be/src/format/reader/file_reader.h
+++ b/be/src/format/reader/file_reader.h
@@ -42,6 +42,12 @@ namespace doris::reader {
using ColumnId = int32_t;
+enum ColumnType {
+ DATA_COLUMN = 0, // normal data column
+ ROW_NUMBER = 1, // row number in a file
+ FILE_NAME = 2, // file name
+};
+
// 文件本地 schema 字段。
// 这是 FileReader 暴露给 table 层的 file-local schema 视图,不携带 table/global
// schema 语义。Iceberg field id、name mapping、default/generated/partition 列都不在
@@ -51,6 +57,7 @@ struct SchemaField {
std::string name;
DataTypePtr type;
std::vector<SchemaField> children;
+ ColumnType column_type = ColumnType::DATA_COLUMN;
};
// 已经 localize 到文件 schema 的过滤条件。
diff --git a/be/src/format/reader/table_reader.h
b/be/src/format/reader/table_reader.h
index d14e1e78261..c3744427aa0 100644
--- a/be/src/format/reader/table_reader.h
+++ b/be/src/format/reader/table_reader.h
@@ -232,11 +232,10 @@ protected:
// 打开当前具体 reader。
// 子类在这里基于当前 split/task 初始化底层 FileReader。
virtual Status open_reader() {
- std::vector<SchemaField> file_schema;
- RETURN_IF_ERROR(_data_reader.reader->get_schema(&file_schema));
-
RETURN_IF_ERROR(_data_reader.column_mapper.create_mapping(_options.projected_columns,
-
_data_reader.block_schema,
-
_partition_values, file_schema));
+ _data_reader.block_schema.clear();
+
RETURN_IF_ERROR(_data_reader.reader->get_schema(&_data_reader.block_schema));
+ RETURN_IF_ERROR(_data_reader.column_mapper.create_mapping(
+ _options.projected_columns, _partition_values,
_data_reader.block_schema));
FileScanRequest file_request;
RETURN_IF_ERROR(_data_reader.column_mapper.create_scan_request(
@@ -270,7 +269,6 @@ protected:
struct DataReader {
std::unique_ptr<FileReader> reader;
TableColumnMapper column_mapper;
- // Schema of blocks from file reader.
std::vector<SchemaField> block_schema;
};
DataReader _data_reader;
diff --git a/be/test/format/reader/expr/cast_test.cpp
b/be/test/format/reader/expr/cast_test.cpp
new file mode 100644
index 00000000000..4f215418953
--- /dev/null
+++ b/be/test/format/reader/expr/cast_test.cpp
@@ -0,0 +1,210 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format/reader/expr/cast.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "exprs/vexpr_context.h"
+#include "format/reader/column_mapper.h"
+#include "format/reader/expr/literal.h"
+#include "format/reader/expr/slot_ref.h"
+#include "format/reader/file_reader.h"
+#include "format/reader/table_reader.h"
+#include "runtime/descriptors.h"
+#include "testutil/column_helper.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris {
+
+class CastTest : public testing::Test {
+protected:
+ void SetUp() override { state.set_enable_strict_cast(true); }
+
+ static VExprContextSPtr create_context(const DataTypePtr& return_type,
+ const DataTypePtr& child_type, int
child_column_id = 0) {
+ auto cast = Cast::create_shared(return_type);
+ cast->add_child(TableSlotRef::create_shared(child_column_id,
child_column_id, -1,
+ child_type,
"source_column"));
+ return VExprContext::create_shared(cast);
+ }
+
+ Status prepare_open_execute(VExprContext* context, Block* block, int*
result_column_id) {
+ RETURN_IF_ERROR(context->prepare(&state, RowDescriptor()));
+ RETURN_IF_ERROR(context->open(&state));
+ return context->execute(block, result_column_id);
+ }
+
+ MockRuntimeState state;
+};
+
+TEST_F(CastTest, CastIntSlotToBigInt) {
+ auto source_type = std::make_shared<DataTypeInt32>();
+ auto return_type = std::make_shared<DataTypeInt64>();
+ auto context = create_context(return_type, source_type);
+ Block block;
+ block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({1, -2,
3}));
+
+ int result_column_id = -1;
+ auto status = prepare_open_execute(context.get(), &block,
&result_column_id);
+ ASSERT_TRUE(status.ok()) << status;
+
+ ASSERT_EQ(result_column_id, 1);
+ ASSERT_EQ(block.columns(), 2);
+ EXPECT_EQ(block.get_by_position(result_column_id).type, return_type);
+ const auto& result_column =
+ assert_cast<const
ColumnInt64&>(*block.get_by_position(result_column_id).column);
+ EXPECT_EQ(result_column.get_data()[0], 1);
+ EXPECT_EQ(result_column.get_data()[1], -2);
+ EXPECT_EQ(result_column.get_data()[2], 3);
+
+ context->close();
+}
+
+TEST_F(CastTest, CastStringSlotToNullableInt) {
+ state.set_enable_strict_cast(false);
+ auto source_type = std::make_shared<DataTypeString>();
+ auto return_type =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+ auto context = create_context(return_type, source_type);
+ Block block;
+ block.insert(ColumnHelper::create_column_with_name<DataTypeString>({"10",
"bad", "-3"}));
+
+ int result_column_id = -1;
+ auto status = prepare_open_execute(context.get(), &block,
&result_column_id);
+ ASSERT_TRUE(status.ok()) << status;
+
+ const auto& nullable_column =
+ assert_cast<const
ColumnNullable&>(*block.get_by_position(result_column_id).column);
+ const auto& result_column =
+ assert_cast<const
ColumnInt32&>(nullable_column.get_nested_column());
+ const auto& null_map = nullable_column.get_null_map_data();
+ EXPECT_EQ(result_column.get_data()[0], 10);
+ EXPECT_EQ(result_column.get_data()[2], -3);
+ EXPECT_EQ(null_map[0], 0);
+ EXPECT_EQ(null_map[1], 1);
+ EXPECT_EQ(null_map[2], 0);
+
+ context->close();
+}
+
+TEST_F(CastTest, CastLiteralToString) {
+ auto source_type = std::make_shared<DataTypeInt32>();
+ auto return_type = std::make_shared<DataTypeString>();
+ auto cast = Cast::create_shared(return_type);
+ cast->add_child(TableLiteral::create_shared(source_type,
Field::create_field<TYPE_INT>(123)));
+ auto context = VExprContext::create_shared(cast);
+ Block block;
+ block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({1, 2,
3}));
+
+ int result_column_id = -1;
+ auto status = prepare_open_execute(context.get(), &block,
&result_column_id);
+ ASSERT_TRUE(status.ok()) << status;
+
+ const auto& result = block.get_by_position(result_column_id);
+ EXPECT_EQ(result.type->to_string(*result.column, 0), "123");
+ EXPECT_EQ(result.type->to_string(*result.column, 1), "123");
+ EXPECT_EQ(result.type->to_string(*result.column, 2), "123");
+
+ context->close();
+}
+
+TEST_F(CastTest, EmptyBlockAppendsEmptyResultColumn) {
+ auto source_type = std::make_shared<DataTypeInt32>();
+ auto return_type = std::make_shared<DataTypeInt64>();
+ auto context = create_context(return_type, source_type);
+ Block block;
+ block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({}));
+
+ int result_column_id = -1;
+ auto status = prepare_open_execute(context.get(), &block,
&result_column_id);
+ ASSERT_TRUE(status.ok()) << status;
+
+ ASSERT_EQ(result_column_id, 1);
+ EXPECT_EQ(block.get_by_position(result_column_id).column->size(), 0);
+
+ context->close();
+}
+
+TEST_F(CastTest, PrepareRejectsMissingChild) {
+ auto cast = Cast::create_shared(std::make_shared<DataTypeInt64>());
+ VExprContext context(cast);
+
+ auto status = context.prepare(&state, RowDescriptor());
+ ASSERT_FALSE(status.ok());
+ EXPECT_NE(status.to_string().find("exactly 1 child expr"),
std::string::npos);
+}
+
+TEST_F(CastTest, PrepareRejectsMultipleChildren) {
+ auto child_type = std::make_shared<DataTypeInt32>();
+ auto cast = Cast::create_shared(std::make_shared<DataTypeInt64>());
+ cast->add_child(TableSlotRef::create_shared(0, 0, -1, child_type, "c0"));
+ cast->add_child(TableSlotRef::create_shared(1, 1, -1, child_type, "c1"));
+ VExprContext context(cast);
+
+ auto status = context.prepare(&state, RowDescriptor());
+ ASSERT_FALSE(status.ok());
+ EXPECT_NE(status.to_string().find("exactly 1 child expr"),
std::string::npos);
+}
+
+TEST_F(CastTest, ColumnMapperBuildsCastProjectionForTypeMismatch) {
+ reader::TableColumnMapper mapper;
+ reader::TableColumn table_column;
+ table_column.id = 7;
+ table_column.name = "value";
+ table_column.type = std::make_shared<DataTypeInt64>();
+ std::vector<reader::TableColumn> projected_columns {table_column};
+
+ reader::SchemaField file_field;
+ file_field.id = 0;
+ file_field.name = "value";
+ file_field.type = std::make_shared<DataTypeInt32>();
+ std::vector<reader::SchemaField> file_schema {file_field};
+
+ auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+ ASSERT_TRUE(status.ok()) << status;
+ ASSERT_EQ(mapper.mappings().size(), 1);
+ const auto& mapping = mapper.mappings()[0];
+ EXPECT_FALSE(mapping.is_trivial);
+ ASSERT_NE(mapping.projection, nullptr);
+
+ Block block;
+ block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({11,
22}));
+ int result_column_id = -1;
+ status = prepare_open_execute(mapping.projection.get(), &block,
&result_column_id);
+ ASSERT_TRUE(status.ok()) << status;
+
+ const auto& result_column =
+ assert_cast<const
ColumnInt64&>(*block.get_by_position(result_column_id).column);
+ EXPECT_EQ(result_column.get_data()[0], 11);
+ EXPECT_EQ(result_column.get_data()[1], 22);
+
+ mapping.projection->close();
+}
+
+} // namespace doris
diff --git a/be/test/format/reader/expr/table_expr_test.cpp
b/be/test/format/reader/expr/table_expr_test.cpp
index dd831071483..3caca73c6c5 100644
--- a/be/test/format/reader/expr/table_expr_test.cpp
+++ b/be/test/format/reader/expr/table_expr_test.cpp
@@ -79,11 +79,14 @@ TEST(TableLiteralTest, ExecuteAppendsConstColumnToBlock) {
TEST(TableSlotRefTest, KeepsSlotColumnIdsAndType) {
auto type = std::make_shared<DataTypeInt32>();
- auto slot_ref = TableSlotRef::create_shared(10, 20, 30, type);
+ std::string name = "file_col";
+ auto slot_ref = TableSlotRef::create_shared(10, 20, 30, type, name);
EXPECT_EQ(slot_ref->slot_id(), 10);
EXPECT_EQ(slot_ref->column_id(), 20);
EXPECT_EQ(slot_ref->data_type(), type);
+ EXPECT_EQ(slot_ref->expr_name(), "file_col");
+ EXPECT_EQ(slot_ref->column_name(), "file_col");
EXPECT_FALSE(slot_ref->is_constant());
std::set<int> column_ids;
@@ -94,14 +97,16 @@ TEST(TableSlotRefTest, KeepsSlotColumnIdsAndType) {
TEST(TableSlotRefTest, PrepareDoesNotRequireRowDescriptor) {
auto type = std::make_shared<DataTypeInt32>();
- auto slot_ref = TableSlotRef::create_shared(10, 20, 30, type);
+ std::string name = "";
+ auto slot_ref = TableSlotRef::create_shared(10, 20, 30, type, name);
EXPECT_TRUE(slot_ref->prepare(nullptr, RowDescriptor(), nullptr).ok());
}
TEST(TableSlotRefTest, ExecuteReturnsReferencedColumnId) {
auto type = std::make_shared<DataTypeInt32>();
- auto slot_ref = TableSlotRef::create_shared(10, 1, 30, type);
+ std::string name = "";
+ auto slot_ref = TableSlotRef::create_shared(10, 1, 30, type, name);
Block block;
block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({1, 2,
3}));
block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({4, 5,
6}));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]