This is an automated email from the ASF dual-hosted git repository.
zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 26645e9da84 [fix] remove useless const_cast and explain const_cast for
vec (2) (#56464)
26645e9da84 is described below
commit 26645e9da84abdbcbee3cf5b48a195587cdd8dde
Author: admiring_xm <[email protected]>
AuthorDate: Mon Oct 20 10:49:34 2025 +0800
[fix] remove useless const_cast and explain const_cast for vec (2) (#56464)
### What problem does this PR solve?
go through whole be/ and find all const_cast
Issue Number: #55057
Problem Summary:
1. remove useless const_cast
2. explain why using const_cast does not result in undefined behavior
3. don't modify some const_cast
(1) some code in DBUG_EXECUTE_IF or test file
(2) underlying data structures, such as cow
(3) const_cast<const T*>
---
be/src/olap/push_handler.cpp | 9 +++---
be/src/vec/exec/format/column_type_convert.cpp | 3 +-
be/src/vec/exec/format/csv/csv_reader.cpp | 9 ++++--
be/src/vec/exec/format/orc/vorc_reader.cpp | 16 ++++++----
be/src/vec/exec/format/orc/vorc_reader.h | 10 +++----
.../format/parquet/byte_stream_split_decoder.cpp | 2 ++
.../format/parquet/fix_length_dict_decoder.hpp | 2 ++
.../exec/format/parquet/fix_length_plain_decoder.h | 2 ++
.../exec/format/parquet/parquet_column_convert.cpp | 3 +-
.../exec/format/parquet/parquet_column_convert.h | 8 ++---
be/src/vec/exec/format/parquet/parquet_pred_cmp.h | 2 +-
be/src/vec/exec/format/parquet/schema_desc.cpp | 2 +-
be/src/vec/exec/format/parquet/schema_desc.h | 4 +--
.../parquet/vparquet_column_chunk_reader.cpp | 2 +-
.../format/parquet/vparquet_column_chunk_reader.h | 2 +-
.../exec/format/parquet/vparquet_column_reader.cpp | 35 ++++++++++++----------
.../exec/format/parquet/vparquet_column_reader.h | 24 +++++++--------
.../exec/format/parquet/vparquet_file_metadata.h | 1 +
.../exec/format/parquet/vparquet_group_reader.cpp | 7 +++--
.../exec/format/parquet/vparquet_group_reader.h | 4 +--
be/src/vec/exec/format/parquet/vparquet_reader.cpp | 16 +++++-----
be/src/vec/exec/format/parquet/vparquet_reader.h | 4 +--
be/src/vec/exec/format/table/iceberg_reader.cpp | 14 ++++-----
be/src/vec/exec/format/table/iceberg_reader.h | 3 +-
be/src/vec/exec/jni_connector.cpp | 28 ++++++++---------
be/src/vec/exec/jni_connector.h | 8 ++---
be/src/vec/exec/scan/file_scanner.cpp | 8 ++---
27 files changed, 123 insertions(+), 105 deletions(-)
diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp
index 07586acc5a2..677531d8df7 100644
--- a/be/src/olap/push_handler.cpp
+++ b/be/src/olap/push_handler.cpp
@@ -655,11 +655,10 @@ Status PushBrokerReader::_get_next_reader() {
switch (_file_params.format_type) {
case TFileFormatType::FORMAT_PARQUET: {
std::unique_ptr<vectorized::ParquetReader> parquet_reader =
- vectorized::ParquetReader::create_unique(
- _runtime_profile, _file_params, range,
- _runtime_state->query_options().batch_size,
-
const_cast<cctz::time_zone*>(&_runtime_state->timezone_obj()),
- _io_ctx.get(), _runtime_state.get());
+ vectorized::ParquetReader::create_unique(_runtime_profile,
_file_params, range,
+
_runtime_state->query_options().batch_size,
+
&_runtime_state->timezone_obj(),
+ _io_ctx.get(),
_runtime_state.get());
init_status = parquet_reader->init_reader(
_all_col_names, _colname_to_value_range, _push_down_exprs,
_real_tuple_desc,
diff --git a/be/src/vec/exec/format/column_type_convert.cpp
b/be/src/vec/exec/format/column_type_convert.cpp
index d0cc2ba7342..4415faf860b 100644
--- a/be/src/vec/exec/format/column_type_convert.cpp
+++ b/be/src/vec/exec/format/column_type_convert.cpp
@@ -123,8 +123,7 @@ ColumnPtr ColumnTypeConverter::get_column(const
DataTypePtr& src_type, ColumnPtr
// In order to share null map between parquet converted src column and
dst column to avoid copying. It is very tricky that will
// call mutable function
`doris_nullable_column->get_null_map_column_ptr()` which will set
`_need_update_has_null = true`.
// Because some operations such as agg will call `has_null()` to set
`_need_update_has_null = false`.
- auto* doris_nullable_column =
- const_cast<ColumnNullable*>(static_cast<const
ColumnNullable*>(dst_column.get()));
+ auto* doris_nullable_column = static_cast<const
ColumnNullable*>(dst_column.get());
return ColumnNullable::create(_cached_src_column,
doris_nullable_column->get_null_map_column_ptr());
}
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp
b/be/src/vec/exec/format/csv/csv_reader.cpp
index a8c8f2fe15e..3e7fca5af99 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -301,6 +301,7 @@ Status CsvReader::init_reader(bool is_load) {
return Status::OK();
}
+// !FIXME: Here we should use MutableBlock
Status CsvReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
if (_line_reader_eof) {
*eof = true;
@@ -643,6 +644,8 @@ Status CsvReader::_fill_dest_columns(const Slice& line,
Block* block,
IColumn* col_ptr = columns[i].get();
if (!_is_load) {
+ // block is a Block*, and get_by_position returns a ColumnPtr,
+ // which is a const pointer. Therefore, using const_cast is
permissible.
col_ptr = const_cast<IColumn*>(
block->get_by_position(_file_slot_idx_map[i]).column.get());
}
@@ -666,6 +669,8 @@ Status CsvReader::_fill_empty_line(Block* block,
std::vector<MutableColumnPtr>&
for (int i = 0; i < _file_slot_descs.size(); ++i) {
IColumn* col_ptr = columns[i].get();
if (!_is_load) {
+ // block is a Block*, and get_by_position returns a ColumnPtr,
+ // which is a const pointer. Therefore, using const_cast is
permissible.
col_ptr = const_cast<IColumn*>(
block->get_by_position(_file_slot_idx_map[i]).column.get());
}
@@ -754,7 +759,7 @@ Status CsvReader::_parse_col_nums(size_t* col_nums) {
return Status::InternalError<false>(
"The first line is empty, can not parse column numbers");
}
- if (!validate_utf8(_params, const_cast<char*>(reinterpret_cast<const
char*>(ptr)), size)) {
+ if (!validate_utf8(_params, reinterpret_cast<const char*>(ptr), size)) {
return Status::InternalError<false>("Only support csv data in utf8
codec");
}
ptr = _remove_bom(ptr, size);
@@ -771,7 +776,7 @@ Status
CsvReader::_parse_col_names(std::vector<std::string>* col_names) {
if (size == 0) {
return Status::InternalError<false>("The first line is empty, can not
parse column names");
}
- if (!validate_utf8(_params, const_cast<char*>(reinterpret_cast<const
char*>(ptr)), size)) {
+ if (!validate_utf8(_params, reinterpret_cast<const char*>(ptr), size)) {
return Status::InternalError<false>("Only support csv data in utf8
codec");
}
ptr = _remove_bom(ptr, size);
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index ce650534cd1..bd996f2189b 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -1285,6 +1285,8 @@ Status OrcReader::_fill_partition_columns(
DataTypeSerDe::FormatOptions _text_formatOptions;
for (const auto& kv : partition_columns) {
auto doris_column = block->get_by_name(kv.first).column;
+ // block is a Block*, and get_by_name returns a ColumnPtr,
+ // which is a const pointer. Therefore, using const_cast is
permissible.
auto* col_ptr = const_cast<IColumn*>(doris_column.get());
const auto& [value, slot_desc] = kv.second;
auto text_serde = slot_desc->get_data_type_ptr()->get_serde();
@@ -1654,6 +1656,8 @@ Status OrcReader::_fill_doris_array_offsets(const
std::string& col_name,
size_t num_values, size_t*
element_size) {
SCOPED_RAW_TIMER(&_statistics.decode_value_time);
if (num_values > 0) {
+ // The const variable uses a non-const method from a third-party
dependency
+ // without modification, so const_cast can be used.
if (const_cast<orc::DataBuffer<int64_t>&>(orc_offsets).size() <
num_values + 1) {
return Status::InternalError("Wrong array offsets in orc file for
column '{}'",
col_name);
@@ -1722,9 +1726,9 @@ Status OrcReader::_fill_doris_data_column(const
std::string& col_name,
size_t element_size = 0;
RETURN_IF_ERROR(_fill_doris_array_offsets(col_name, doris_offsets,
orc_offsets, num_values,
&element_size));
- DataTypePtr& nested_type = const_cast<DataTypePtr&>(
+ const DataTypePtr& nested_type =
reinterpret_cast<const
DataTypeArray*>(remove_nullable(data_type).get())
- ->get_nested_type());
+ ->get_nested_type();
const orc::Type* nested_orc_type = orc_column_type->getSubtype(0);
std::string element_name = col_name + ".element";
return _orc_column_to_doris_column<false>(
@@ -1742,12 +1746,12 @@ Status OrcReader::_fill_doris_data_column(const
std::string& col_name,
size_t element_size = 0;
RETURN_IF_ERROR(_fill_doris_array_offsets(col_name,
doris_map.get_offsets(),
orc_map->offsets,
num_values, &element_size));
- DataTypePtr& doris_key_type = const_cast<DataTypePtr&>(
+ const DataTypePtr& doris_key_type =
reinterpret_cast<const
DataTypeMap*>(remove_nullable(data_type).get())
- ->get_key_type());
- DataTypePtr& doris_value_type = const_cast<DataTypePtr&>(
+ ->get_key_type();
+ const DataTypePtr& doris_value_type =
reinterpret_cast<const
DataTypeMap*>(remove_nullable(data_type).get())
- ->get_value_type());
+ ->get_value_type();
const orc::Type* orc_key_type = orc_column_type->getSubtype(0);
const orc::Type* orc_value_type = orc_column_type->getSubtype(1);
ColumnPtr& doris_key_column = doris_map.get_keys_ptr();
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h
b/be/src/vec/exec/format/orc/vorc_reader.h
index 9067323215e..358ba1d2468 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -380,8 +380,8 @@ private:
if (scale_params.scale_type != DecimalScaleParams::NOT_INIT) {
return;
}
- auto* decimal_type =
reinterpret_cast<DataTypeDecimal<DecimalPrimitiveType>*>(
- const_cast<IDataType*>(remove_nullable(data_type).get()));
+ auto* decimal_type = reinterpret_cast<const
DataTypeDecimal<DecimalPrimitiveType>*>(
+ remove_nullable(data_type).get());
auto dest_scale = decimal_type->get_scale();
if (dest_scale > orc_decimal_scale) {
scale_params.scale_type = DecimalScaleParams::SCALE_UP;
@@ -431,7 +431,7 @@ private:
if constexpr (std::is_same_v<OrcColumnType,
orc::Decimal64VectorBatch>) {
value = static_cast<int128_t>(cvb_data[i]);
} else {
- // cast data to non const
+ // cast data to non const, to use a third-party dependency
method to obtain an integer
auto* non_const_data = const_cast<OrcColumnType*>(data);
uint64_t hi = non_const_data->values[i].getHighBits();
uint64_t lo = non_const_data->values[i].getLowBits();
@@ -447,7 +447,7 @@ private:
if constexpr (std::is_same_v<OrcColumnType,
orc::Decimal64VectorBatch>) {
value = static_cast<int128_t>(cvb_data[i]);
} else {
- // cast data to non const
+ // cast data to non const, to use a third-party dependency
method to obtain an integer
auto* non_const_data = const_cast<OrcColumnType*>(data);
uint64_t hi = non_const_data->values[i].getHighBits();
uint64_t lo = non_const_data->values[i].getLowBits();
@@ -463,7 +463,7 @@ private:
if constexpr (std::is_same_v<OrcColumnType,
orc::Decimal64VectorBatch>) {
value = static_cast<int128_t>(cvb_data[i]);
} else {
- // cast data to non const
+ // cast data to non const, to use a third-party dependency
method to obtain an integer
auto* non_const_data = const_cast<OrcColumnType*>(data);
uint64_t hi = non_const_data->values[i].getHighBits();
uint64_t lo = non_const_data->values[i].getLowBits();
diff --git a/be/src/vec/exec/format/parquet/byte_stream_split_decoder.cpp
b/be/src/vec/exec/format/parquet/byte_stream_split_decoder.cpp
index 051e33ebec4..931198881af 100644
--- a/be/src/vec/exec/format/parquet/byte_stream_split_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/byte_stream_split_decoder.cpp
@@ -51,6 +51,8 @@ Status
ByteStreamSplitDecoder::_decode_values(MutableColumnPtr& doris_column,
size_t scale_size = (select_vector.num_values() -
select_vector.num_filtered()) *
(_type_length / primitive_length);
doris_column->resize(doris_column->size() + scale_size);
+ // doris_column is of type MutableColumnPtr, which uses get_raw_data
+ // to return a StringRef, hence the use of const_cast.
char* raw_data = const_cast<char*>(doris_column->get_raw_data().data);
ColumnSelectVector::DataReadType read_type;
DCHECK(_data->get_size() % _type_length == 0);
diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
index b997c10da51..c932c15b30f 100644
--- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
+++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
@@ -73,6 +73,8 @@ protected:
size_t scale_size = (select_vector.num_values() -
select_vector.num_filtered()) *
(_type_length / primitive_length);
doris_column->resize(doris_column->size() + scale_size);
+ // doris_column is of type MutableColumnPtr, which uses get_raw_data
+ // to return a StringRef, hence the use of const_cast.
char* raw_data = const_cast<char*>(doris_column->get_raw_data().data);
size_t dict_index = 0;
ColumnSelectVector::DataReadType read_type;
diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
index c8f372fb43d..bd0e4e94b14 100644
--- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
+++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
@@ -52,6 +52,8 @@ public:
size_t scale_size = (select_vector.num_values() -
select_vector.num_filtered()) *
(_type_length / primitive_length);
doris_column->resize(doris_column->size() + scale_size);
+ // doris_column is of type MutableColumnPtr, which uses get_raw_data
+ // to return a StringRef, hence the use of const_cast.
char* raw_data = const_cast<char*>(doris_column->get_raw_data().data);
ColumnSelectVector::DataReadType read_type;
while (size_t run_length =
select_vector.get_next_run<has_filter>(&read_type)) {
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
index 23e0253f45e..e1405d9a885 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
@@ -122,8 +122,7 @@ ColumnPtr
PhysicalToLogicalConverter::get_physical_column(tparquet::Type::type s
// In order to share null map between parquet converted src column and
dst column to avoid copying. It is very tricky that will
// call mutable function
`doris_nullable_column->get_null_map_column_ptr()` which will set
`_need_update_has_null = true`.
// Because some operations such as agg will call `has_null()` to set
`_need_update_has_null = false`.
- auto* doris_nullable_column = const_cast<ColumnNullable*>(
- assert_cast<const ColumnNullable*>(dst_logical_column.get()));
+ auto* doris_nullable_column = assert_cast<const
ColumnNullable*>(dst_logical_column.get());
return ColumnNullable::create(_cached_src_physical_column,
doris_nullable_column->get_null_map_column_ptr());
}
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 18c79bb15f9..696b1482216 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -64,7 +64,7 @@ struct ConvertParams {
// The missing parque metadata makes it impossible for us to know
the time zone information,
// so we default to UTC here.
if (ctz == nullptr) {
- ctz = const_cast<cctz::time_zone*>(&utc0);
+ ctz = &utc0;
}
}
}
@@ -84,7 +84,7 @@ struct ConvertParams {
// When a timestamp is stored as `1970-01-03 12:00:00`,
// if isAdjustedToUTC = true, UTC8 should read as `1970-01-03
20:00:00`, UTC6 should read as `1970-01-03 18:00:00`
// if isAdjustedToUTC = false, UTC8 and UTC6 should read as
`1970-01-03 12:00:00`, which is the same as `1970-01-03 12:00:00` in UTC0
- ctz = const_cast<cctz::time_zone*>(&utc0);
+ ctz = &utc0;
}
const auto& time_unit = timestamp_info.unit;
if (time_unit.__isset.MILLIS) {
@@ -182,8 +182,8 @@ public:
ColumnPtr src_logical_column;
if (is_consistent()) {
if (dst_logical_type->is_nullable()) {
- auto doris_nullable_column = const_cast<ColumnNullable*>(
- assert_cast<const
ColumnNullable*>(dst_logical_col.get()));
+ auto doris_nullable_column =
+ assert_cast<const
ColumnNullable*>(dst_logical_col.get());
src_logical_column =
ColumnNullable::create(_cached_src_physical_column,
doris_nullable_column->get_null_map_column_ptr());
diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
index 64dd7b7ce27..3126616d3e3 100644
--- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
+++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
@@ -398,7 +398,7 @@ public:
static bool check_can_filter(OP op, const std::vector<Field>&
literal_values,
const ColumnStat& column_stat, const
FieldSchema* col_schema,
- cctz::time_zone* ctz) {
+ const cctz::time_zone* ctz) {
Field min_field;
Field max_field;
if (!ParquetPredicate::get_min_max_value(col_schema,
column_stat.encoded_min_value,
diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp
b/be/src/vec/exec/format/parquet/schema_desc.cpp
index 9cb194a70ba..82e6e0c9d61 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.cpp
+++ b/be/src/vec/exec/format/parquet/schema_desc.cpp
@@ -582,7 +582,7 @@ int FieldDescriptor::get_column_index(const std::string&
column) const {
return -1;
}
-const FieldSchema* FieldDescriptor::get_column(const std::string& name) const {
+FieldSchema* FieldDescriptor::get_column(const std::string& name) const {
auto it = _name_to_field.find(name);
if (it != _name_to_field.end()) {
return it->second;
diff --git a/be/src/vec/exec/format/parquet/schema_desc.h
b/be/src/vec/exec/format/parquet/schema_desc.h
index 0430edfcfe6..09a52a4251a 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.h
+++ b/be/src/vec/exec/format/parquet/schema_desc.h
@@ -71,7 +71,7 @@ private:
// The leaf node of schema elements
std::vector<FieldSchema*> _physical_fields;
// Name to _fields, not all schema elements
- std::unordered_map<std::string, const FieldSchema*> _name_to_field;
+ std::unordered_map<std::string, FieldSchema*> _name_to_field;
// Used in from_thrift, marking the next schema position that should be
parsed
size_t _next_schema_pos;
@@ -124,7 +124,7 @@ public:
* @param name Column name
* @return FieldSchema or nullptr if not exists
*/
- const FieldSchema* get_column(const std::string& name) const;
+ FieldSchema* get_column(const std::string& name) const;
void get_column_names(std::unordered_set<std::string>* names) const;
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
index 5bcf4abf919..4484e2142ee 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
@@ -50,7 +50,7 @@ namespace doris::vectorized {
ColumnChunkReader::ColumnChunkReader(io::BufferedStreamReader* reader,
tparquet::ColumnChunk* column_chunk,
FieldSchema* field_schema,
const tparquet::OffsetIndex* offset_index,
- cctz::time_zone* ctz, io::IOContext*
io_ctx)
+ const cctz::time_zone* ctz,
io::IOContext* io_ctx)
: _field_schema(field_schema),
_max_rep_level(field_schema->repetition_level),
_max_def_level(field_schema->definition_level),
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h
b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h
index db0530da597..93be18aa002 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h
@@ -89,7 +89,7 @@ public:
ColumnChunkReader(io::BufferedStreamReader* reader, tparquet::ColumnChunk*
column_chunk,
FieldSchema* field_schema, const tparquet::OffsetIndex*
offset_index,
- cctz::time_zone* ctz, io::IOContext* io_ctx);
+ const cctz::time_zone* ctz, io::IOContext* io_ctx);
~ColumnChunkReader() = default;
// Initialize chunk reader, will generate the decoder and codec.
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 170a29b3e94..74fe4c3601b 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -105,8 +105,8 @@ static void fill_array_offset(FieldSchema* field,
ColumnArray::Offsets64& offset
Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field,
const tparquet::RowGroup& row_group,
- const std::vector<RowRange>& row_ranges,
cctz::time_zone* ctz,
- io::IOContext* io_ctx,
+ const std::vector<RowRange>& row_ranges,
+ const cctz::time_zone* ctz, io::IOContext*
io_ctx,
std::unique_ptr<ParquetColumnReader>&
reader,
size_t max_buf_size, const
tparquet::OffsetIndex* offset_index) {
if (field->data_type->get_primitive_type() == TYPE_ARRAY) {
@@ -248,6 +248,8 @@ Status ScalarColumnReader::_read_values(size_t num_values,
ColumnPtr& doris_colu
NullMap* map_data_column = nullptr;
if (doris_column->is_nullable()) {
SCOPED_RAW_TIMER(&_decode_null_map_time);
+ // doris_column either originates from a mutable block in
vparquet_group_reader
+ // or is a newly created ColumnPtr, and therefore can be modified.
auto* nullable_column =
assert_cast<vectorized::ColumnNullable*>(const_cast<IColumn*>(doris_column.get()));
@@ -406,6 +408,8 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr&
doris_column, DataType
NullMap* map_data_column = nullptr;
if (doris_column->is_nullable()) {
SCOPED_RAW_TIMER(&_decode_null_map_time);
+ // doris_column either originates from a mutable block in
vparquet_group_reader
+ // or is a newly created ColumnPtr, and therefore can be modified.
auto* nullable_column = const_cast<vectorized::ColumnNullable*>(
assert_cast<const
vectorized::ColumnNullable*>(doris_column.get()));
data_column = nullable_column->get_nested_column_ptr();
@@ -578,7 +582,7 @@ Status ScalarColumnReader::_try_load_dict_page(bool*
loaded, bool* has_dict) {
}
Status ScalarColumnReader::read_column_data(
- ColumnPtr& doris_column, DataTypePtr& type,
+ ColumnPtr& doris_column, const DataTypePtr& type,
const std::shared_ptr<TableSchemaChangeHelper::Node>& root_node,
FilterMap& filter_map,
size_t batch_size, size_t* read_rows, bool* eof, bool is_dict_filter) {
if (_converter == nullptr) {
@@ -592,6 +596,7 @@ Status ScalarColumnReader::read_column_data(
_field_schema->data_type->get_name(), type->get_name());
}
}
+ // !FIXME: We should verify whether the get_physical_column logic is
correct, why do we return a doris_column?
ColumnPtr resolved_column =
_converter->get_physical_column(_field_schema->physical_type,
_field_schema->data_type,
doris_column, type,
is_dict_filter);
@@ -694,7 +699,7 @@ Status
ArrayColumnReader::init(std::unique_ptr<ParquetColumnReader> element_read
}
Status ArrayColumnReader::read_column_data(
- ColumnPtr& doris_column, DataTypePtr& type,
+ ColumnPtr& doris_column, const DataTypePtr& type,
const std::shared_ptr<TableSchemaChangeHelper::Node>& root_node,
FilterMap& filter_map,
size_t batch_size, size_t* read_rows, bool* eof, bool is_dict_filter) {
MutableColumnPtr data_column;
@@ -717,8 +722,8 @@ Status ArrayColumnReader::read_column_data(
}
ColumnPtr& element_column =
assert_cast<ColumnArray&>(*data_column).get_data_ptr();
- auto& element_type = const_cast<DataTypePtr&>(
- (assert_cast<const
DataTypeArray*>(remove_nullable(type).get()))->get_nested_type());
+ const DataTypePtr& element_type =
+ (assert_cast<const
DataTypeArray*>(remove_nullable(type).get()))->get_nested_type();
// read nested column
RETURN_IF_ERROR(_element_reader->read_column_data(element_column,
element_type,
root_node->get_element_node(), filter_map,
@@ -746,7 +751,7 @@ Status
MapColumnReader::init(std::unique_ptr<ParquetColumnReader> key_reader,
}
Status MapColumnReader::read_column_data(
- ColumnPtr& doris_column, DataTypePtr& type,
+ ColumnPtr& doris_column, const DataTypePtr& type,
const std::shared_ptr<TableSchemaChangeHelper::Node>& root_node,
FilterMap& filter_map,
size_t batch_size, size_t* read_rows, bool* eof, bool is_dict_filter) {
MutableColumnPtr data_column;
@@ -769,10 +774,10 @@ Status MapColumnReader::read_column_data(
}
auto& map = assert_cast<ColumnMap&>(*data_column);
- auto& key_type = const_cast<DataTypePtr&>(
- assert_cast<const
DataTypeMap*>(remove_nullable(type).get())->get_key_type());
- auto& value_type = const_cast<DataTypePtr&>(
- assert_cast<const
DataTypeMap*>(remove_nullable(type).get())->get_value_type());
+ const DataTypePtr& key_type =
+ assert_cast<const
DataTypeMap*>(remove_nullable(type).get())->get_key_type();
+ const DataTypePtr& value_type =
+ assert_cast<const
DataTypeMap*>(remove_nullable(type).get())->get_value_type();
ColumnPtr& key_column = map.get_keys_ptr();
ColumnPtr& value_column = map.get_values_ptr();
@@ -819,7 +824,7 @@ Status StructColumnReader::init(
return Status::OK();
}
Status StructColumnReader::read_column_data(
- ColumnPtr& doris_column, DataTypePtr& type,
+ ColumnPtr& doris_column, const DataTypePtr& type,
const std::shared_ptr<TableSchemaChangeHelper::Node>& root_node,
FilterMap& filter_map,
size_t batch_size, size_t* read_rows, bool* eof, bool is_dict_filter) {
MutableColumnPtr data_column;
@@ -851,8 +856,8 @@ Status StructColumnReader::read_column_data(
for (size_t i = 0; i < doris_struct.tuple_size(); ++i) {
ColumnPtr& doris_field = doris_struct.get_column_ptr(i);
- auto& doris_type =
const_cast<DataTypePtr&>(doris_struct_type->get_element(i));
- auto& doris_name =
const_cast<String&>(doris_struct_type->get_element_name(i));
+ auto& doris_type = doris_struct_type->get_element(i);
+ auto& doris_name = doris_struct_type->get_element_name(i);
if (!root_node->children_column_exists(doris_name)) {
missing_column_idxs.push_back(i);
continue;
@@ -911,7 +916,7 @@ Status StructColumnReader::read_column_data(
// fill missing column with null or default value
for (auto idx : missing_column_idxs) {
auto& doris_field = doris_struct.get_column_ptr(idx);
- auto& doris_type =
const_cast<DataTypePtr&>(doris_struct_type->get_element(idx));
+ auto& doris_type = doris_struct_type->get_element(idx);
DCHECK(doris_type->is_nullable());
auto mutable_column = doris_field->assume_mutable();
auto* nullable_column =
static_cast<vectorized::ColumnNullable*>(mutable_column.get());
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.h
b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
index 80f13f629bc..f0d02b0d3e4 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
@@ -116,11 +116,11 @@ public:
}
};
- ParquetColumnReader(const std::vector<RowRange>& row_ranges,
cctz::time_zone* ctz,
+ ParquetColumnReader(const std::vector<RowRange>& row_ranges, const
cctz::time_zone* ctz,
io::IOContext* io_ctx)
: _row_ranges(row_ranges), _ctz(ctz), _io_ctx(io_ctx) {}
virtual ~ParquetColumnReader() = default;
- virtual Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type,
+ virtual Status read_column_data(ColumnPtr& doris_column, const
DataTypePtr& type,
const
std::shared_ptr<TableSchemaChangeHelper::Node>& root_node,
FilterMap& filter_map, size_t batch_size,
size_t* read_rows,
bool* eof, bool is_dict_filter) = 0;
@@ -136,7 +136,7 @@ public:
static Status create(io::FileReaderSPtr file, FieldSchema* field,
const tparquet::RowGroup& row_group,
- const std::vector<RowRange>& row_ranges,
cctz::time_zone* ctz,
+ const std::vector<RowRange>& row_ranges, const
cctz::time_zone* ctz,
io::IOContext* io_ctx,
std::unique_ptr<ParquetColumnReader>& reader,
size_t max_buf_size, const tparquet::OffsetIndex*
offset_index = nullptr);
void set_nested_column() { _nested_column = true; }
@@ -155,7 +155,7 @@ protected:
// When scalar column is the child of nested column, we should turn off
the filtering by page index and lazy read.
bool _nested_column = false;
const std::vector<RowRange>& _row_ranges;
- cctz::time_zone* _ctz = nullptr;
+ const cctz::time_zone* _ctz = nullptr;
io::IOContext* _io_ctx = nullptr;
int64_t _current_row_index = 0;
int _row_range_index = 0;
@@ -169,14 +169,14 @@ class ScalarColumnReader : public ParquetColumnReader {
public:
ScalarColumnReader(const std::vector<RowRange>& row_ranges,
const tparquet::ColumnChunk& chunk_meta,
- const tparquet::OffsetIndex* offset_index,
cctz::time_zone* ctz,
+ const tparquet::OffsetIndex* offset_index, const
cctz::time_zone* ctz,
io::IOContext* io_ctx)
: ParquetColumnReader(row_ranges, ctz, io_ctx),
_chunk_meta(chunk_meta),
_offset_index(offset_index) {}
~ScalarColumnReader() override { close(); }
Status init(io::FileReaderSPtr file, FieldSchema* field, size_t
max_buf_size);
- Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type,
+ Status read_column_data(ColumnPtr& doris_column, const DataTypePtr& type,
const
std::shared_ptr<TableSchemaChangeHelper::Node>& root_node,
FilterMap& filter_map, size_t batch_size, size_t*
read_rows, bool* eof,
bool is_dict_filter) override;
@@ -218,12 +218,12 @@ private:
class ArrayColumnReader : public ParquetColumnReader {
ENABLE_FACTORY_CREATOR(ArrayColumnReader)
public:
- ArrayColumnReader(const std::vector<RowRange>& row_ranges,
cctz::time_zone* ctz,
+ ArrayColumnReader(const std::vector<RowRange>& row_ranges, const
cctz::time_zone* ctz,
io::IOContext* io_ctx)
: ParquetColumnReader(row_ranges, ctz, io_ctx) {}
~ArrayColumnReader() override { close(); }
Status init(std::unique_ptr<ParquetColumnReader> element_reader,
FieldSchema* field);
- Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type,
+ Status read_column_data(ColumnPtr& doris_column, const DataTypePtr& type,
const
std::shared_ptr<TableSchemaChangeHelper::Node>& root_node,
FilterMap& filter_map, size_t batch_size, size_t*
read_rows, bool* eof,
bool is_dict_filter) override;
@@ -245,14 +245,14 @@ private:
class MapColumnReader : public ParquetColumnReader {
ENABLE_FACTORY_CREATOR(MapColumnReader)
public:
- MapColumnReader(const std::vector<RowRange>& row_ranges, cctz::time_zone*
ctz,
+ MapColumnReader(const std::vector<RowRange>& row_ranges, const
cctz::time_zone* ctz,
io::IOContext* io_ctx)
: ParquetColumnReader(row_ranges, ctz, io_ctx) {}
~MapColumnReader() override { close(); }
Status init(std::unique_ptr<ParquetColumnReader> key_reader,
std::unique_ptr<ParquetColumnReader> value_reader,
FieldSchema* field);
- Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type,
+ Status read_column_data(ColumnPtr& doris_column, const DataTypePtr& type,
const
std::shared_ptr<TableSchemaChangeHelper::Node>& root_node,
FilterMap& filter_map, size_t batch_size, size_t*
read_rows, bool* eof,
bool is_dict_filter) override;
@@ -286,7 +286,7 @@ private:
class StructColumnReader : public ParquetColumnReader {
ENABLE_FACTORY_CREATOR(StructColumnReader)
public:
- StructColumnReader(const std::vector<RowRange>& row_ranges,
cctz::time_zone* ctz,
+ StructColumnReader(const std::vector<RowRange>& row_ranges, const
cctz::time_zone* ctz,
io::IOContext* io_ctx)
: ParquetColumnReader(row_ranges, ctz, io_ctx) {}
~StructColumnReader() override { close(); }
@@ -294,7 +294,7 @@ public:
Status init(
std::unordered_map<std::string,
std::unique_ptr<ParquetColumnReader>>&& child_readers,
FieldSchema* field);
- Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type,
+ Status read_column_data(ColumnPtr& doris_column, const DataTypePtr& type,
const
std::shared_ptr<TableSchemaChangeHelper::Node>& root_node,
FilterMap& filter_map, size_t batch_size, size_t*
read_rows, bool* eof,
bool is_dict_filter) override;
diff --git a/be/src/vec/exec/format/parquet/vparquet_file_metadata.h
b/be/src/vec/exec/format/parquet/vparquet_file_metadata.h
index 93ac499ff46..5090c42602d 100644
--- a/be/src/vec/exec/format/parquet/vparquet_file_metadata.h
+++ b/be/src/vec/exec/format/parquet/vparquet_file_metadata.h
@@ -31,6 +31,7 @@ public:
~FileMetaData();
Status init_schema();
const FieldDescriptor& schema() const { return _schema; }
+ FieldDescriptor& schema() { return _schema; }
const tparquet::FileMetaData& to_thrift() const;
std::string debug_string() const;
size_t get_mem_size() const { return _mem_size; }
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 96feeceea8d..7d1f9d27430 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -80,7 +80,7 @@ static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE
= std::numeric_limi
RowGroupReader::RowGroupReader(io::FileReaderSPtr file_reader,
const std::vector<std::string>& read_columns,
const int32_t row_group_id, const
tparquet::RowGroup& row_group,
- cctz::time_zone* ctz, io::IOContext* io_ctx,
+ const cctz::time_zone* ctz, io::IOContext*
io_ctx,
const PositionDeleteContext&
position_delete_ctx,
const LazyReadContext& lazy_read_ctx,
RuntimeState* state)
: _file_reader(file_reader),
@@ -123,7 +123,7 @@ Status RowGroupReader::init(
for (const auto& read_table_col : _read_table_columns) {
auto read_file_col =
_table_info_node_ptr->children_file_column_name(read_table_col);
- auto* field =
const_cast<FieldSchema*>(schema.get_column(read_file_col));
+ auto* field = schema.get_column(read_file_col);
auto physical_index = field->physical_column_index;
std::unique_ptr<ParquetColumnReader> reader;
// TODO : support rested column types
@@ -157,7 +157,7 @@ Status RowGroupReader::init(
int slot_id = predicate_col_slot_ids[i];
auto predicate_file_col_name =
_table_info_node_ptr->children_file_column_name(predicate_col_name);
- auto field =
const_cast<FieldSchema*>(schema.get_column(predicate_file_col_name));
+ auto field = schema.get_column(predicate_file_col_name);
if (!disable_dict_filter && !_lazy_read_ctx.has_complex_type &&
_can_filter_by_dict(
slot_id,
_row_group_meta.columns[field->physical_column_index].meta_data)) {
@@ -662,6 +662,7 @@ Status RowGroupReader::_fill_partition_columns(
DataTypeSerDe::FormatOptions _text_formatOptions;
for (auto& kv : partition_columns) {
auto doris_column = block->get_by_name(kv.first).column;
+ // obtained from block*, it is a mutable object.
IColumn* col_ptr = const_cast<IColumn*>(doris_column.get());
auto& [value, slot_desc] = kv.second;
auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.h
b/be/src/vec/exec/format/parquet/vparquet_group_reader.h
index 9d0a59a4e91..86b7cdf3f0e 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.h
@@ -148,7 +148,7 @@ public:
RowGroupReader(io::FileReaderSPtr file_reader, const
std::vector<std::string>& read_columns,
const int32_t row_group_id, const tparquet::RowGroup&
row_group,
- cctz::time_zone* ctz, io::IOContext* io_ctx,
+ const cctz::time_zone* ctz, io::IOContext* io_ctx,
const PositionDeleteContext& position_delete_ctx,
const LazyReadContext& lazy_read_ctx, RuntimeState* state);
@@ -225,7 +225,7 @@ private:
const int32_t _row_group_id;
const tparquet::RowGroup& _row_group_meta;
int64_t _remaining_rows;
- cctz::time_zone* _ctz = nullptr;
+ const cctz::time_zone* _ctz = nullptr;
io::IOContext* _io_ctx = nullptr;
PositionDeleteContext _position_delete_ctx;
// merge the row ranges generated from page index and position delete.
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 787161efc22..3ffecd1c9c5 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -83,9 +83,9 @@ namespace doris::vectorized {
#include "common/compile_check_begin.h"
ParquetReader::ParquetReader(RuntimeProfile* profile, const
TFileScanRangeParams& params,
- const TFileRangeDesc& range, size_t batch_size,
cctz::time_zone* ctz,
- io::IOContext* io_ctx, RuntimeState* state,
FileMetaCache* meta_cache,
- bool enable_lazy_mat)
+ const TFileRangeDesc& range, size_t batch_size,
+ const cctz::time_zone* ctz, io::IOContext*
io_ctx, RuntimeState* state,
+ FileMetaCache* meta_cache, bool enable_lazy_mat)
: _profile(profile),
_scan_params(params),
_scan_range(range),
@@ -826,12 +826,12 @@ RowGroupReader::PositionDeleteContext
ParquetReader::_get_position_delete_ctx(
if (_delete_rows == nullptr) {
return RowGroupReader::PositionDeleteContext(row_group.num_rows,
row_group_index.first_row);
}
- int64_t* delete_rows = const_cast<int64_t*>(&(*_delete_rows)[0]);
- int64_t* delete_rows_end = delete_rows + _delete_rows->size();
- int64_t* start_pos = std::lower_bound(delete_rows + _delete_rows_index,
delete_rows_end,
- row_group_index.first_row);
+ const int64_t* delete_rows = &(*_delete_rows)[0];
+ const int64_t* delete_rows_end = delete_rows + _delete_rows->size();
+ const int64_t* start_pos = std::lower_bound(delete_rows +
_delete_rows_index, delete_rows_end,
+ row_group_index.first_row);
int64_t start_index = start_pos - delete_rows;
- int64_t* end_pos = std::lower_bound(start_pos, delete_rows_end,
row_group_index.last_row);
+ const int64_t* end_pos = std::lower_bound(start_pos, delete_rows_end,
row_group_index.last_row);
int64_t end_index = end_pos - delete_rows;
_delete_rows_index = end_index;
return RowGroupReader::PositionDeleteContext(*_delete_rows,
row_group.num_rows,
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h
b/be/src/vec/exec/format/parquet/vparquet_reader.h
index 293f0549b6a..3b9a6b04db9 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.h
@@ -99,7 +99,7 @@ public:
};
ParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
- const TFileRangeDesc& range, size_t batch_size,
cctz::time_zone* ctz,
+ const TFileRangeDesc& range, size_t batch_size, const
cctz::time_zone* ctz,
io::IOContext* io_ctx, RuntimeState* state, FileMetaCache*
meta_cache = nullptr,
bool enable_lazy_mat = true);
@@ -296,7 +296,7 @@ private:
size_t _batch_size;
int64_t _range_start_offset;
int64_t _range_size;
- cctz::time_zone* _ctz = nullptr;
+ const cctz::time_zone* _ctz = nullptr;
std::unordered_map<int, tparquet::OffsetIndex> _col_offsets;
diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp
b/be/src/vec/exec/format/table/iceberg_reader.cpp
index a9640e353b1..db6e44c3d4b 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.cpp
+++ b/be/src/vec/exec/format/table/iceberg_reader.cpp
@@ -295,12 +295,12 @@ IcebergTableReader::PositionDeleteRange
IcebergTableReader::_get_range(
const ColumnDictI32& file_path_column) {
IcebergTableReader::PositionDeleteRange range;
size_t read_rows = file_path_column.get_data().size();
- int* code_path = const_cast<int*>(file_path_column.get_data().data());
- int* code_path_start = code_path;
- int* code_path_end = code_path + read_rows;
+ const int* code_path = file_path_column.get_data().data();
+ const int* code_path_start = code_path;
+ const int* code_path_end = code_path + read_rows;
while (code_path < code_path_end) {
int code = code_path[0];
- int* code_end = std::upper_bound(code_path, code_path_end, code);
+ const int* code_end = std::upper_bound(code_path, code_path_end, code);
range.data_file_path.emplace_back(file_path_column.get_value(code).to_string());
range.range.emplace_back(code_path - code_path_start, code_end -
code_path_start);
code_path = code_end;
@@ -454,9 +454,9 @@ Status IcebergParquetReader::init_reader(
Status IcebergParquetReader ::_read_position_delete_file(const TFileRangeDesc*
delete_range,
DeleteFile*
position_delete) {
- ParquetReader parquet_delete_reader(
- _profile, _params, *delete_range, READ_DELETE_FILE_BATCH_SIZE,
- const_cast<cctz::time_zone*>(&_state->timezone_obj()), _io_ctx,
_state, _meta_cache);
+ ParquetReader parquet_delete_reader(_profile, _params, *delete_range,
+ READ_DELETE_FILE_BATCH_SIZE,
&_state->timezone_obj(),
+ _io_ctx, _state, _meta_cache);
RETURN_IF_ERROR(parquet_delete_reader.init_reader(
delete_file_col_names, nullptr, {}, nullptr, nullptr, nullptr,
nullptr, nullptr,
TableSchemaChangeHelper::ConstNode::get_instance(), false));
diff --git a/be/src/vec/exec/format/table/iceberg_reader.h
b/be/src/vec/exec/format/table/iceberg_reader.h
index 188e31f0378..6b0e3fe4b3e 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.h
+++ b/be/src/vec/exec/format/table/iceberg_reader.h
@@ -182,8 +182,7 @@ protected:
std::unique_ptr<GenericReader> _create_equality_reader(
const TFileRangeDesc& delete_desc) final {
return ParquetReader::create_unique(_profile, _params, delete_desc,
- READ_DELETE_FILE_BATCH_SIZE,
-
const_cast<cctz::time_zone*>(&_state->timezone_obj()),
+ READ_DELETE_FILE_BATCH_SIZE,
&_state->timezone_obj(),
_io_ctx, _state, _meta_cache);
}
diff --git a/be/src/vec/exec/jni_connector.cpp
b/be/src/vec/exec/jni_connector.cpp
index 91457cb7718..87a6ec8c224 100644
--- a/be/src/vec/exec/jni_connector.cpp
+++ b/be/src/vec/exec/jni_connector.cpp
@@ -337,7 +337,7 @@ Status JniConnector::_fill_block(Block* block, size_t
num_rows) {
}
Status JniConnector::_fill_column(TableMetaAddress& address, ColumnPtr&
doris_column,
- DataTypePtr& data_type, size_t num_rows) {
+ const DataTypePtr& data_type, size_t
num_rows) {
auto logical_type = data_type->get_primitive_type();
void* null_map_ptr = address.next_meta_as_ptr();
if (null_map_ptr == nullptr) {
@@ -410,9 +410,9 @@ Status
JniConnector::_fill_varbinary_column(TableMetaAddress& address,
Status JniConnector::_fill_string_column(TableMetaAddress& address,
MutableColumnPtr& doris_column,
size_t num_rows) {
- const auto& string_col = static_cast<const ColumnString&>(*doris_column);
- auto& string_chars =
const_cast<ColumnString::Chars&>(string_col.get_chars());
- auto& string_offsets =
const_cast<ColumnString::Offsets&>(string_col.get_offsets());
+ auto& string_col = static_cast<ColumnString&>(*doris_column);
+ ColumnString::Chars& string_chars = string_col.get_chars();
+ ColumnString::Offsets& string_offsets = string_col.get_offsets();
int* offsets = reinterpret_cast<int*>(address.next_meta_as_ptr());
char* chars = reinterpret_cast<char*>(address.next_meta_as_ptr());
@@ -438,11 +438,11 @@ Status
JniConnector::_fill_string_column(TableMetaAddress& address, MutableColum
}
Status JniConnector::_fill_array_column(TableMetaAddress& address,
MutableColumnPtr& doris_column,
- DataTypePtr& data_type, size_t
num_rows) {
+ const DataTypePtr& data_type, size_t
num_rows) {
ColumnPtr& element_column =
static_cast<ColumnArray&>(*doris_column).get_data_ptr();
- DataTypePtr& element_type = const_cast<DataTypePtr&>(
+ const DataTypePtr& element_type =
(assert_cast<const
DataTypeArray*>(remove_nullable(data_type).get()))
- ->get_nested_type());
+ ->get_nested_type();
ColumnArray::Offsets64& offsets_data =
static_cast<ColumnArray&>(*doris_column).get_offsets();
int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr());
@@ -460,13 +460,13 @@ Status JniConnector::_fill_array_column(TableMetaAddress&
address, MutableColumn
}
Status JniConnector::_fill_map_column(TableMetaAddress& address,
MutableColumnPtr& doris_column,
- DataTypePtr& data_type, size_t num_rows)
{
+ const DataTypePtr& data_type, size_t
num_rows) {
auto& map = static_cast<ColumnMap&>(*doris_column);
- DataTypePtr& key_type = const_cast<DataTypePtr&>(
- reinterpret_cast<const
DataTypeMap*>(remove_nullable(data_type).get())->get_key_type());
- DataTypePtr& value_type = const_cast<DataTypePtr&>(
+ const DataTypePtr& key_type =
+ reinterpret_cast<const
DataTypeMap*>(remove_nullable(data_type).get())->get_key_type();
+ const DataTypePtr& value_type =
reinterpret_cast<const
DataTypeMap*>(remove_nullable(data_type).get())
- ->get_value_type());
+ ->get_value_type();
ColumnPtr& key_column = map.get_keys_ptr();
ColumnPtr& value_column = map.get_values_ptr();
ColumnArray::Offsets64& map_offsets = map.get_offsets();
@@ -487,13 +487,13 @@ Status JniConnector::_fill_map_column(TableMetaAddress&
address, MutableColumnPt
}
Status JniConnector::_fill_struct_column(TableMetaAddress& address,
MutableColumnPtr& doris_column,
- DataTypePtr& data_type, size_t
num_rows) {
+ const DataTypePtr& data_type, size_t
num_rows) {
auto& doris_struct = static_cast<ColumnStruct&>(*doris_column);
const DataTypeStruct* doris_struct_type =
reinterpret_cast<const
DataTypeStruct*>(remove_nullable(data_type).get());
for (int i = 0; i < doris_struct.tuple_size(); ++i) {
ColumnPtr& struct_field = doris_struct.get_column_ptr(i);
- DataTypePtr& field_type =
const_cast<DataTypePtr&>(doris_struct_type->get_element(i));
+ const DataTypePtr& field_type = doris_struct_type->get_element(i);
RETURN_IF_ERROR(_fill_column(address, struct_field, field_type,
num_rows));
}
return Status::OK();
diff --git a/be/src/vec/exec/jni_connector.h b/be/src/vec/exec/jni_connector.h
index 442c619be4e..f55a7f243fe 100644
--- a/be/src/vec/exec/jni_connector.h
+++ b/be/src/vec/exec/jni_connector.h
@@ -329,7 +329,7 @@ private:
Status _fill_block(Block* block, size_t num_rows);
static Status _fill_column(TableMetaAddress& address, ColumnPtr&
doris_column,
- DataTypePtr& data_type, size_t num_rows);
+ const DataTypePtr& data_type, size_t num_rows);
static Status _fill_string_column(TableMetaAddress& address,
MutableColumnPtr& doris_column,
size_t num_rows);
@@ -338,13 +338,13 @@ private:
size_t num_rows);
static Status _fill_map_column(TableMetaAddress& address,
MutableColumnPtr& doris_column,
- DataTypePtr& data_type, size_t num_rows);
+ const DataTypePtr& data_type, size_t
num_rows);
static Status _fill_array_column(TableMetaAddress& address,
MutableColumnPtr& doris_column,
- DataTypePtr& data_type, size_t num_rows);
+ const DataTypePtr& data_type, size_t
num_rows);
static Status _fill_struct_column(TableMetaAddress& address,
MutableColumnPtr& doris_column,
- DataTypePtr& data_type, size_t num_rows);
+ const DataTypePtr& data_type, size_t
num_rows);
static Status _fill_column_meta(const ColumnPtr& doris_column, const
DataTypePtr& data_type,
std::vector<long>& meta_data);
diff --git a/be/src/vec/exec/scan/file_scanner.cpp
b/be/src/vec/exec/scan/file_scanner.cpp
index 976dc7ade4e..9b0fc999117 100644
--- a/be/src/vec/exec/scan/file_scanner.cpp
+++ b/be/src/vec/exec/scan/file_scanner.cpp
@@ -632,6 +632,7 @@ Status FileScanner::_fill_columns_from_path(size_t rows) {
DataTypeSerDe::FormatOptions _text_formatOptions;
for (auto& kv : _partition_col_descs) {
auto doris_column = _src_block_ptr->get_by_name(kv.first).column;
+ // _src_block_ptr points to a mutable block created by this class
itself, so const_cast can be used here.
IColumn* col_ptr = const_cast<IColumn*>(doris_column.get());
auto& [value, slot_desc] = kv.second;
auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
@@ -1034,8 +1035,8 @@ Status FileScanner::_get_next_reader() {
: nullptr;
std::unique_ptr<ParquetReader> parquet_reader =
ParquetReader::create_unique(
_profile, *_params, range,
_state->query_options().batch_size,
- const_cast<cctz::time_zone*>(&_state->timezone_obj()),
_io_ctx.get(), _state,
- file_meta_cache_ptr,
_state->query_options().enable_parquet_lazy_mat);
+ &_state->timezone_obj(), _io_ctx.get(), _state,
file_meta_cache_ptr,
+ _state->query_options().enable_parquet_lazy_mat);
if (_row_id_column_iterator_pair.second != -1) {
RETURN_IF_ERROR(_create_row_id_column_iterator());
@@ -1479,8 +1480,7 @@ Status FileScanner::read_lines_from_range(const
TFileRangeDesc& range,
case TFileFormatType::FORMAT_PARQUET: {
std::unique_ptr<vectorized::ParquetReader> parquet_reader =
vectorized::ParquetReader::create_unique(
- _profile, *_params, range, 1,
-
const_cast<cctz::time_zone*>(&_state->timezone_obj()),
+ _profile, *_params, range, 1,
&_state->timezone_obj(),
_io_ctx.get(), _state,
file_meta_cache_ptr, false);
RETURN_IF_ERROR(parquet_reader->read_by_rows(row_ids));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]