This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new dd53bc1c8d [unify type system](remove unused type desc) remove some
code (#17921)
dd53bc1c8d is described below
commit dd53bc1c8dbac94f945a24a442e1a24dab22ebbe
Author: yiguolei <[email protected]>
AuthorDate: Sun Mar 19 14:05:02 2023 +0800
[unify type system](remove unused type desc) remove some code (#17921)
There are many type definitions in BE. Should unify the type system and
simplify the development.
---------
Co-authored-by: yiguolei <[email protected]>
---
be/src/exec/base_scanner.cpp | 1 -
be/src/exec/schema_scanner.cpp | 74 ---------
be/src/exec/schema_scanner.h | 3 -
be/src/exec/tablet_info.cpp | 1 +
be/src/exec/tablet_info.h | 1 -
be/src/runtime/buffer_control_block.cpp | 1 -
be/src/runtime/collection_value.cpp | 1 -
be/src/runtime/collection_value.h | 1 -
be/src/runtime/raw_value.h | 183 +---------------------
be/src/runtime/result_buffer_mgr.cpp | 1 -
be/src/runtime/result_queue_mgr.h | 1 -
be/src/runtime/result_writer.h | 1 -
be/src/runtime/struct_value.h | 2 -
be/src/util/CMakeLists.txt | 1 -
be/src/util/arrow/row_block.cpp | 127 ---------------
be/src/util/arrow/row_block.h | 38 -----
be/src/util/hash_util.hpp | 1 +
be/src/vec/columns/column_const.cpp | 3 +-
be/src/vec/core/block.cpp | 11 --
be/src/vec/core/block.h | 3 -
be/src/vec/exec/scan/vfile_scanner.cpp | 1 -
be/src/vec/runtime/vdata_stream_mgr.cpp | 7 +-
be/src/vec/utils/arrow_column_to_doris_column.cpp | 55 -------
be/src/vec/utils/arrow_column_to_doris_column.h | 2 -
be/test/olap/row_cursor_test.cpp | 2 -
be/test/vec/exec/parquet/parquet_thrift_test.cpp | 83 ++++++++--
be/test/vec/exprs/vexpr_test.cpp | 76 ++++++++-
27 files changed, 154 insertions(+), 527 deletions(-)
diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp
index edb057a55d..84329c832d 100644
--- a/be/src/exec/base_scanner.cpp
+++ b/be/src/exec/base_scanner.cpp
@@ -23,7 +23,6 @@
#include "common/utils.h"
#include "exec/exec_node.h"
#include "runtime/descriptors.h"
-#include "runtime/raw_value.h"
#include "runtime/runtime_state.h"
#include "vec/data_types/data_type_factory.hpp"
diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp
index adc52d024d..7b8e625d61 100644
--- a/be/src/exec/schema_scanner.cpp
+++ b/be/src/exec/schema_scanner.cpp
@@ -84,8 +84,6 @@ Status SchemaScanner::init(SchemaScannerParam* param,
ObjectPool* pool) {
return Status::InternalError("invalid parameter");
}
- RETURN_IF_ERROR(create_tuple_desc(pool));
-
_param = param;
_is_init = true;
@@ -302,76 +300,4 @@ Status
SchemaScanner::fill_dest_column_for_range(vectorized::Block* block, size_
return Status::OK();
}
-Status SchemaScanner::create_tuple_desc(ObjectPool* pool) {
- int null_column = 0;
- for (int i = 0; i < _columns.size(); ++i) {
- if (_columns[i].is_null) {
- null_column++;
- }
- }
-
- int offset = (null_column + 7) / 8;
- std::vector<SlotDescriptor*> slots;
- int null_byte = 0;
- int null_bit = 0;
-
- for (int i = 0; i < _columns.size(); ++i) {
- TSlotDescriptor t_slot_desc;
- if (_columns[i].type == TYPE_DECIMALV2) {
-
t_slot_desc.__set_slotType(TypeDescriptor::create_decimalv2_type(27,
9).to_thrift());
- } else {
- TypeDescriptor descriptor(_columns[i].type);
- if (_columns[i].precision >= 0 && _columns[i].scale >= 0) {
- descriptor.precision = _columns[i].precision;
- descriptor.scale = _columns[i].scale;
- }
- t_slot_desc.__set_slotType(descriptor.to_thrift());
- }
- t_slot_desc.__set_colName(_columns[i].name);
- t_slot_desc.__set_columnPos(i);
- t_slot_desc.__set_byteOffset(offset);
-
- if (_columns[i].is_null) {
- t_slot_desc.__set_nullIndicatorByte(null_byte);
- t_slot_desc.__set_nullIndicatorBit(null_bit);
- null_bit = (null_bit + 1) % 8;
-
- if (0 == null_bit) {
- null_byte++;
- }
- } else {
- t_slot_desc.__set_nullIndicatorByte(0);
- t_slot_desc.__set_nullIndicatorBit(-1);
- }
-
- t_slot_desc.id = i;
- t_slot_desc.__set_slotIdx(i);
- t_slot_desc.__set_isMaterialized(true);
-
- SlotDescriptor* slot = pool->add(new (std::nothrow)
SlotDescriptor(t_slot_desc));
-
- if (nullptr == slot) {
- return Status::InternalError("no memory for _tuple_desc.");
- }
-
- slots.push_back(slot);
- offset += _columns[i].size;
- }
-
- TTupleDescriptor t_tuple_desc;
- t_tuple_desc.__set_byteSize(offset);
- t_tuple_desc.__set_numNullBytes((null_byte * 8 + null_bit + 7) / 8);
- _tuple_desc = pool->add(new (std::nothrow) TupleDescriptor(t_tuple_desc));
-
- if (nullptr == _tuple_desc) {
- return Status::InternalError("no memory for _tuple_desc.");
- }
-
- for (int i = 0; i < slots.size(); ++i) {
- _tuple_desc->add_slot(slots[i]);
- }
-
- return Status::OK();
-}
-
} // namespace doris
diff --git a/be/src/exec/schema_scanner.h b/be/src/exec/schema_scanner.h
index 4f7faac06c..ca62f5ac02 100644
--- a/be/src/exec/schema_scanner.h
+++ b/be/src/exec/schema_scanner.h
@@ -88,7 +88,6 @@ public:
const std::vector<ColumnDesc>& get_column_desc() const { return _columns; }
// factory function
static SchemaScanner* create(TSchemaTableType::type type);
- const TupleDescriptor* tuple_desc() const { return _tuple_desc; }
TSchemaTableType::type type() const { return _schema_table_type; }
static void set_doris_server(DorisServer* doris_server) { _s_doris_server
= doris_server; }
@@ -96,14 +95,12 @@ public:
protected:
Status fill_dest_column_for_range(vectorized::Block* block, size_t pos,
const std::vector<void*>& datas);
- Status create_tuple_desc(ObjectPool* pool);
bool _is_init;
// this is used for sub class
SchemaScannerParam* _param;
// schema table's column desc
std::vector<ColumnDesc> _columns;
- TupleDescriptor* _tuple_desc;
static DorisServer* _s_doris_server;
diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp
index d2d2e07a5c..a5c53294f1 100644
--- a/be/src/exec/tablet_info.cpp
+++ b/be/src/exec/tablet_info.cpp
@@ -18,6 +18,7 @@
#include "exec/tablet_info.h"
#include "runtime/large_int_value.h"
+#include "runtime/raw_value.h"
#include "util/string_parser.hpp"
namespace doris {
diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h
index 9753b71349..3556adb13f 100644
--- a/be/src/exec/tablet_info.h
+++ b/be/src/exec/tablet_info.h
@@ -29,7 +29,6 @@
#include "gen_cpp/descriptors.pb.h"
#include "olap/tablet_schema.h"
#include "runtime/descriptors.h"
-#include "runtime/raw_value.h"
#include "vec/core/block.h"
namespace doris {
diff --git a/be/src/runtime/buffer_control_block.cpp
b/be/src/runtime/buffer_control_block.cpp
index 7fa0c50923..89ee93a012 100644
--- a/be/src/runtime/buffer_control_block.cpp
+++ b/be/src/runtime/buffer_control_block.cpp
@@ -20,7 +20,6 @@
#include "gen_cpp/PaloInternalService_types.h"
#include "gen_cpp/internal_service.pb.h"
#include "runtime/exec_env.h"
-#include "runtime/raw_value.h"
#include "runtime/thread_context.h"
#include "service/brpc.h"
#include "util/thrift_util.h"
diff --git a/be/src/runtime/collection_value.cpp
b/be/src/runtime/collection_value.cpp
index 255fce195b..f4543d361d 100644
--- a/be/src/runtime/collection_value.cpp
+++ b/be/src/runtime/collection_value.cpp
@@ -22,7 +22,6 @@
#include "common/object_pool.h"
#include "common/utils.h"
#include "runtime/mem_pool.h"
-#include "runtime/raw_value.h"
#include "runtime/types.h"
#include "vec/common/string_ref.h"
diff --git a/be/src/runtime/collection_value.h
b/be/src/runtime/collection_value.h
index 6b59066f97..02a51e4bd1 100644
--- a/be/src/runtime/collection_value.h
+++ b/be/src/runtime/collection_value.h
@@ -36,7 +36,6 @@ struct ArrayIteratorFunctionsBase;
class ArrayIterator;
class Status;
class ObjectPool;
-struct TypeDescriptor;
template <PrimitiveType type>
struct ArrayIteratorFunctions;
diff --git a/be/src/runtime/raw_value.h b/be/src/runtime/raw_value.h
index 32633d0ae7..b1e635ea16 100644
--- a/be/src/runtime/raw_value.h
+++ b/be/src/runtime/raw_value.h
@@ -36,191 +36,14 @@ class SlotDescriptor;
// Useful utility functions for runtime values (which are passed around as
void*).
class RawValue {
public:
- // Ascii output precision for double/float
- static const int ASCII_PRECISION;
-
- static uint32_t get_hash_value(const void* value, const PrimitiveType&
type) {
- return get_hash_value(value, type, 0);
- }
-
- static uint32_t get_hash_value(const void* value, const PrimitiveType&
type, uint32_t seed);
-
- // Returns hash value for 'value' interpreted as 'type'. The resulting
hash value
- // is combined with the seed value.
- static uint32_t get_hash_value(const void* value, const TypeDescriptor&
type, uint32_t seed) {
- return get_hash_value(value, type.type, seed);
- }
-
- static uint32_t get_hash_value(const void* value, const TypeDescriptor&
type) {
- return get_hash_value(value, type.type, 0);
- }
-
- // Get the hash value using the fvn hash function. Using different seeds
with FVN
- // results in different hash functions. get_hash_value() does not have
this property
- // and cannot be safely used as the first step in data repartitioning.
- // However, get_hash_value() can be significantly faster.
- // TODO: fix get_hash_value
- static uint32_t zlib_crc32(const void* value, const TypeDescriptor& type,
uint32_t seed);
-
// Same as the up function, only use in vec exec engine.
- static uint32_t zlib_crc32(const void* value, size_t len, const
TypeDescriptor& type,
+ static uint32_t zlib_crc32(const void* value, size_t len, const
PrimitiveType& type,
uint32_t seed);
-
- // Compares both values.
- // Return value is < 0 if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2.
- static int compare(const void* v1, const void* v2, const TypeDescriptor&
type);
-
- // Returns true if v1 == v2.
- // This is more performant than compare() == 0 for string equality, mostly
because of
- // the length comparison check.
- static bool eq(const void* v1, const void* v2, const TypeDescriptor& type);
-
- static bool lt(const void* v1, const void* v2, const TypeDescriptor& type);
};
-// Use boost::hash_combine for corner cases. boost::hash_combine is
reimplemented
-// here to use int32t's (instead of size_t)
-// boost::hash_combine does:
-// seed ^= v + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-inline uint32_t RawValue::get_hash_value(const void* v, const PrimitiveType&
type, uint32_t seed) {
- // Hash_combine with v = 0
- if (v == nullptr) {
- uint32_t value = 0x9e3779b9;
- return seed ^ (value + (seed << 6) + (seed >> 2));
- }
-
- switch (type) {
- case TYPE_VARCHAR:
- case TYPE_CHAR:
- case TYPE_HLL:
- case TYPE_STRING: {
- const StringRef* string_value = reinterpret_cast<const StringRef*>(v);
- return HashUtil::hash(string_value->data, string_value->size, seed);
- }
-
- case TYPE_BOOLEAN: {
- uint32_t value = *reinterpret_cast<const bool*>(v) + 0x9e3779b9;
- return seed ^ (value + (seed << 6) + (seed >> 2));
- }
-
- case TYPE_TINYINT:
- return HashUtil::hash(v, 1, seed);
-
- case TYPE_SMALLINT:
- return HashUtil::hash(v, 2, seed);
-
- case TYPE_INT:
- return HashUtil::hash(v, 4, seed);
-
- case TYPE_BIGINT:
- return HashUtil::hash(v, 8, seed);
-
- case TYPE_FLOAT:
- return HashUtil::hash(v, 4, seed);
-
- case TYPE_DOUBLE:
- return HashUtil::hash(v, 8, seed);
-
- case TYPE_DATE:
- case TYPE_DATETIME:
- return HashUtil::hash(v, 16, seed);
-
- case TYPE_DATEV2:
- return HashUtil::hash(v, 4, seed);
-
- case TYPE_DATETIMEV2:
- return HashUtil::hash(v, 8, seed);
-
- case TYPE_DECIMALV2:
- return HashUtil::hash(v, 16, seed);
- case TYPE_DECIMAL32:
- return HashUtil::hash(v, 4, seed);
- case TYPE_DECIMAL64:
- return HashUtil::hash(v, 8, seed);
- case TYPE_DECIMAL128I:
- return HashUtil::hash(v, 16, seed);
-
- case TYPE_LARGEINT:
- return HashUtil::hash(v, 16, seed);
-
- default:
- DCHECK(false) << "invalid type: " << type;
- return 0;
- }
-}
-
// NOTE: this is just for split data, decimal use old doris hash function
// Because crc32 hardware is not equal with zlib crc32
-inline uint32_t RawValue::zlib_crc32(const void* v, const TypeDescriptor&
type, uint32_t seed) {
- // Hash_combine with v = 0
- if (v == nullptr) {
- uint32_t value = 0x9e3779b9;
- return seed ^ (value + (seed << 6) + (seed >> 2));
- }
-
- switch (type.type) {
- case TYPE_VARCHAR:
- case TYPE_HLL:
- case TYPE_STRING: {
- const StringRef* string_value = reinterpret_cast<const StringRef*>(v);
- return HashUtil::zlib_crc_hash(string_value->data, string_value->size,
seed);
- }
- case TYPE_CHAR: {
- // TODO(zc): ugly, use actual value to compute hash value
- const StringRef* string_value = reinterpret_cast<const StringRef*>(v);
- int len = 0;
- while (len < string_value->size) {
- if (string_value->data[len] == '\0') {
- break;
- }
- len++;
- }
- return HashUtil::zlib_crc_hash(string_value->data, len, seed);
- }
- case TYPE_BOOLEAN:
- case TYPE_TINYINT:
- return HashUtil::zlib_crc_hash(v, 1, seed);
- case TYPE_SMALLINT:
- return HashUtil::zlib_crc_hash(v, 2, seed);
- case TYPE_INT:
- case TYPE_DATEV2:
- case TYPE_DECIMAL32:
- return HashUtil::zlib_crc_hash(v, 4, seed);
- case TYPE_BIGINT:
- case TYPE_DATETIMEV2:
- case TYPE_DECIMAL64:
- return HashUtil::zlib_crc_hash(v, 8, seed);
- case TYPE_LARGEINT:
- case TYPE_DECIMAL128I:
- return HashUtil::zlib_crc_hash(v, 16, seed);
- case TYPE_FLOAT:
- return HashUtil::zlib_crc_hash(v, 4, seed);
- case TYPE_DOUBLE:
- return HashUtil::zlib_crc_hash(v, 8, seed);
- case TYPE_DATE:
- case TYPE_DATETIME: {
- const DateTimeValue* date_val = (const DateTimeValue*)v;
- char buf[64];
- int len = date_val->to_buffer(buf);
- return HashUtil::zlib_crc_hash(buf, len, seed);
- }
-
- case TYPE_DECIMALV2: {
- const DecimalV2Value* dec_val = (const DecimalV2Value*)v;
- int64_t int_val = dec_val->int_value();
- int32_t frac_val = dec_val->frac_value();
- seed = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val), seed);
- return HashUtil::zlib_crc_hash(&frac_val, sizeof(frac_val), seed);
- }
- default:
- DCHECK(false) << "invalid type: " << type;
- return 0;
- }
-}
-
-// NOTE: this is just for split data, decimal use old doris hash function
-// Because crc32 hardware is not equal with zlib crc32
-inline uint32_t RawValue::zlib_crc32(const void* v, size_t len, const
TypeDescriptor& type,
+inline uint32_t RawValue::zlib_crc32(const void* v, size_t len, const
PrimitiveType& type,
uint32_t seed) {
// Hash_combine with v = 0
if (v == nullptr) {
@@ -228,7 +51,7 @@ inline uint32_t RawValue::zlib_crc32(const void* v, size_t
len, const TypeDescri
return seed ^ (value + (seed << 6) + (seed >> 2));
}
- switch (type.type) {
+ switch (type) {
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_STRING:
diff --git a/be/src/runtime/result_buffer_mgr.cpp
b/be/src/runtime/result_buffer_mgr.cpp
index 19fb2522b8..02229efe23 100644
--- a/be/src/runtime/result_buffer_mgr.cpp
+++ b/be/src/runtime/result_buffer_mgr.cpp
@@ -22,7 +22,6 @@
#include "gen_cpp/PaloInternalService_types.h"
#include "gen_cpp/types.pb.h"
#include "runtime/buffer_control_block.h"
-#include "runtime/raw_value.h"
#include "util/doris_metrics.h"
namespace doris {
diff --git a/be/src/runtime/result_queue_mgr.h
b/be/src/runtime/result_queue_mgr.h
index 088ad787d3..28dcabfaa5 100644
--- a/be/src/runtime/result_queue_mgr.h
+++ b/be/src/runtime/result_queue_mgr.h
@@ -24,7 +24,6 @@
#include "common/status.h"
#include "runtime/primitive_type.h"
-#include "runtime/raw_value.h"
#include "runtime/record_batch_queue.h"
#include "util/hash_util.hpp"
diff --git a/be/src/runtime/result_writer.h b/be/src/runtime/result_writer.h
index 1d0bfde589..bd5a11cc8f 100644
--- a/be/src/runtime/result_writer.h
+++ b/be/src/runtime/result_writer.h
@@ -24,7 +24,6 @@ namespace doris {
class Status;
class RuntimeState;
-struct TypeDescriptor;
namespace vectorized {
class Block;
diff --git a/be/src/runtime/struct_value.h b/be/src/runtime/struct_value.h
index ec243d729c..d02ddc994c 100644
--- a/be/src/runtime/struct_value.h
+++ b/be/src/runtime/struct_value.h
@@ -45,8 +45,6 @@ public:
void shallow_copy(const StructValue* other);
- // size_t get_byte_size(const TypeDescriptor& type) const;
-
const void** values() const { return const_cast<const void**>(_values); }
void** mutable_values() { return _values; }
void set_values(void** values) { _values = values; }
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index 71ff0e089f..23e14e3b4c 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -23,7 +23,6 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/util")
set(UTIL_FILES
arrow/row_batch.cpp
- arrow/row_block.cpp
arrow/utils.cpp
arrow/block_convertor.cpp
bfd_parser.cpp
diff --git a/be/src/util/arrow/row_block.cpp b/be/src/util/arrow/row_block.cpp
deleted file mode 100644
index a7758707fb..0000000000
--- a/be/src/util/arrow/row_block.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "util/arrow/row_block.h"
-
-#include <arrow/array/builder_primitive.h>
-#include <arrow/memory_pool.h>
-#include <arrow/pretty_print.h>
-#include <arrow/record_batch.h>
-#include <arrow/type.h>
-#include <arrow/type_fwd.h>
-#include <arrow/visit_array_inline.h>
-#include <arrow/visit_type_inline.h>
-
-#include "gutil/strings/substitute.h"
-#include "olap/field.h"
-#include "olap/olap_common.h"
-#include "olap/schema.h"
-#include "olap/tablet_schema.h"
-#include "util/arrow/utils.h"
-
-namespace doris {
-
-using strings::Substitute;
-
-Status convert_to_arrow_type(FieldType type, std::shared_ptr<arrow::DataType>*
result) {
- switch (type) {
- case OLAP_FIELD_TYPE_TINYINT:
- *result = arrow::int8();
- break;
- case OLAP_FIELD_TYPE_SMALLINT:
- *result = arrow::int16();
- break;
- case OLAP_FIELD_TYPE_INT:
- *result = arrow::int32();
- break;
- case OLAP_FIELD_TYPE_BIGINT:
- *result = arrow::int64();
- break;
- case OLAP_FIELD_TYPE_FLOAT:
- *result = arrow::float32();
- break;
- case OLAP_FIELD_TYPE_DOUBLE:
- *result = arrow::float64();
- break;
- default:
- return Status::InvalidArgument("Unknown FieldType({})", type);
- }
- return Status::OK();
-}
-
-Status convert_to_arrow_field(uint32_t cid, const Field* field,
- std::shared_ptr<arrow::Field>* result) {
- std::shared_ptr<arrow::DataType> type;
- RETURN_IF_ERROR(convert_to_arrow_type(field->type(), &type));
- *result = arrow::field(strings::Substitute("Col$0", cid), type,
field->is_nullable());
- return Status::OK();
-}
-
-Status convert_to_arrow_schema(const Schema& schema,
std::shared_ptr<arrow::Schema>* result) {
- std::vector<std::shared_ptr<arrow::Field>> fields;
- size_t num_fields = schema.num_column_ids();
- fields.resize(num_fields);
- for (int i = 0; i < num_fields; ++i) {
- auto cid = schema.column_ids()[i];
- RETURN_IF_ERROR(convert_to_arrow_field(cid, schema.column(cid),
&fields[i]));
- }
- *result = arrow::schema(std::move(fields));
- return Status::OK();
-}
-
-Status convert_to_type_name(const arrow::DataType& type, std::string* name) {
- switch (type.id()) {
- case arrow::Type::INT8:
- *name = "TINYINT";
- break;
- case arrow::Type::INT16:
- *name = "SMALLINT";
- break;
- case arrow::Type::INT32:
- *name = "INT";
- break;
- case arrow::Type::INT64:
- *name = "BIGINT";
- break;
- case arrow::Type::FLOAT:
- *name = "FLOAT";
- break;
- case arrow::Type::DOUBLE:
- *name = "DOUBLE";
- break;
- default:
- return Status::InvalidArgument("Unknown arrow type id({})", type.id());
- }
- return Status::OK();
-}
-
-Status convert_to_tablet_column(const arrow::Field& field, int32_t cid,
TabletColumn* output) {
- ColumnPB column_pb;
- std::string type_name;
- RETURN_IF_ERROR(convert_to_type_name(*field.type(), &type_name));
-
- column_pb.set_unique_id(cid);
- column_pb.set_name(field.name());
- column_pb.set_type(type_name);
- column_pb.set_is_key(true);
- column_pb.set_is_nullable(field.nullable());
-
- output->init_from_pb(column_pb);
- return Status::OK();
-}
-
-} // namespace doris
diff --git a/be/src/util/arrow/row_block.h b/be/src/util/arrow/row_block.h
deleted file mode 100644
index afdecdd52a..0000000000
--- a/be/src/util/arrow/row_block.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-
-#include "common/status.h"
-
-namespace arrow {
-
-class Schema;
-class MemoryPool;
-class RecordBatch;
-
-} // namespace arrow
-
-namespace doris {
-class Schema;
-
-// Convert Doris Schema to Arrow Schema.
-Status convert_to_arrow_schema(const Schema& row_desc,
std::shared_ptr<arrow::Schema>* result);
-
-} // namespace doris
diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp
index 726ad50f92..ecc0f04a19 100644
--- a/be/src/util/hash_util.hpp
+++ b/be/src/util/hash_util.hpp
@@ -37,6 +37,7 @@
#include <zlib.h>
#include "gen_cpp/Types_types.h"
+#include "runtime/define_primitive_type.h"
#include "util/cpu_info.h"
#include "util/murmur_hash3.h"
diff --git a/be/src/vec/columns/column_const.cpp
b/be/src/vec/columns/column_const.cpp
index 8d79ec32f9..7b7fc4d753 100644
--- a/be/src/vec/columns/column_const.cpp
+++ b/be/src/vec/columns/column_const.cpp
@@ -131,8 +131,7 @@ void
ColumnConst::update_crcs_with_value(std::vector<uint64_t>& hashes, doris::P
}
} else {
for (int i = 0; i < hashes.size(); ++i) {
- hashes[i] = RawValue::zlib_crc32(real_data.data, real_data.size,
TypeDescriptor {type},
- hashes[i]);
+ hashes[i] = RawValue::zlib_crc32(real_data.data, real_data.size,
type, hashes[i]);
}
}
}
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index a5534f8075..ebd7f3726d 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -825,17 +825,6 @@ Status Block::serialize(int be_exec_version, PBlock*
pblock,
return Status::OK();
}
-inline bool Block::is_column_data_null(const doris::TypeDescriptor& type_desc,
- const StringRef& data_ref, const
IColumn* column, int row) {
- if (type_desc.type != TYPE_ARRAY) {
- return data_ref.data == nullptr;
- } else {
- Field array;
- column->get(row, array);
- return array.is_null();
- }
-}
-
MutableBlock::MutableBlock(const std::vector<TupleDescriptor*>& tuple_descs,
int reserve_size,
bool ignore_trivial_slot) {
for (auto tuple_desc : tuple_descs) {
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index 35026b846d..eef530659c 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -42,7 +42,6 @@ namespace doris {
class RowDescriptor;
class Status;
class TupleDescriptor;
-struct TypeDescriptor;
namespace vectorized {
@@ -371,8 +370,6 @@ public:
private:
void erase_impl(size_t position);
- bool is_column_data_null(const doris::TypeDescriptor& type_desc, const
StringRef& data_ref,
- const IColumn* column_with_type_and_name, int
row);
};
using Blocks = std::vector<Block>;
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp
b/be/src/vec/exec/scan/vfile_scanner.cpp
index c9cade7ec8..7ab2f5bc50 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -28,7 +28,6 @@
#include "io/cache/block/block_file_cache_profile.h"
#include "olap/iterators.h"
#include "runtime/descriptors.h"
-#include "runtime/raw_value.h"
#include "runtime/runtime_state.h"
#include "vec/exec/format/csv/csv_reader.h"
#include "vec/exec/format/json/new_json_reader.h"
diff --git a/be/src/vec/runtime/vdata_stream_mgr.cpp
b/be/src/vec/runtime/vdata_stream_mgr.cpp
index 282d641431..09c9cd77c7 100644
--- a/be/src/vec/runtime/vdata_stream_mgr.cpp
+++ b/be/src/vec/runtime/vdata_stream_mgr.cpp
@@ -20,7 +20,6 @@
#include "gen_cpp/internal_service.pb.h"
#include "runtime/descriptors.h"
#include "runtime/primitive_type.h"
-#include "runtime/raw_value.h"
#include "runtime/runtime_state.h"
#include "util/doris_metrics.h"
#include "util/runtime_profile.h"
@@ -39,9 +38,9 @@ VDataStreamMgr::~VDataStreamMgr() {
inline uint32_t VDataStreamMgr::get_hash_value(const TUniqueId&
fragment_instance_id,
PlanNodeId node_id) {
- uint32_t value = RawValue::get_hash_value(&fragment_instance_id.lo,
TYPE_BIGINT, 0);
- value = RawValue::get_hash_value(&fragment_instance_id.hi, TYPE_BIGINT,
value);
- value = RawValue::get_hash_value(&node_id, TYPE_INT, value);
+ uint32_t value = HashUtil::hash(&fragment_instance_id.lo, 8, 0);
+ value = HashUtil::hash(&fragment_instance_id.hi, 8, value);
+ value = HashUtil::hash(&node_id, 4, value);
return value;
}
diff --git a/be/src/vec/utils/arrow_column_to_doris_column.cpp
b/be/src/vec/utils/arrow_column_to_doris_column.cpp
index 35d5d4956a..c0d64df211 100644
--- a/be/src/vec/utils/arrow_column_to_doris_column.cpp
+++ b/be/src/vec/utils/arrow_column_to_doris_column.cpp
@@ -408,59 +408,4 @@ Status arrow_column_to_doris_column(const arrow::Array*
arrow_column, size_t arr
fmt::format("Not support arrow type:{}",
arrow_column->type()->name()));
}
-Status arrow_type_to_doris_type(arrow::Type::type type, TypeDescriptor*
return_type) {
- switch (type) {
- case arrow::Type::STRING:
- case arrow::Type::BINARY:
- case arrow::Type::FIXED_SIZE_BINARY:
- return_type->type = TYPE_STRING;
- break;
- case arrow::Type::INT8:
- return_type->type = TYPE_TINYINT;
- break;
- case arrow::Type::UINT8:
- case arrow::Type::INT16:
- return_type->type = TYPE_SMALLINT;
- break;
- case arrow::Type::UINT16:
- case arrow::Type::INT32:
- return_type->type = TYPE_INT;
- break;
- case arrow::Type::UINT32:
- case arrow::Type::INT64:
- return_type->type = TYPE_BIGINT;
- break;
- case arrow::Type::UINT64:
- return_type->type = TYPE_LARGEINT;
- break;
- case arrow::Type::HALF_FLOAT:
- case arrow::Type::FLOAT:
- return_type->type = TYPE_FLOAT;
- break;
- case arrow::Type::DOUBLE:
- return_type->type = TYPE_DOUBLE;
- break;
- case arrow::Type::BOOL:
- return_type->type = TYPE_BOOLEAN;
- break;
- case arrow::Type::DATE32:
- return_type->type = TYPE_DATEV2;
- break;
- case arrow::Type::DATE64:
- return_type->type = TYPE_DATETIMEV2;
- break;
- case arrow::Type::TIMESTAMP:
- return_type->type = TYPE_BIGINT;
- break;
- case arrow::Type::DECIMAL:
- return_type->type = TYPE_DECIMALV2;
- return_type->precision = 27;
- return_type->scale = 9;
- break;
- default:
- return Status::InternalError("unsupport type: {}", type);
- }
- return Status::OK();
-}
-
} // namespace doris::vectorized
diff --git a/be/src/vec/utils/arrow_column_to_doris_column.h
b/be/src/vec/utils/arrow_column_to_doris_column.h
index 84e26a7011..73704c95f5 100644
--- a/be/src/vec/utils/arrow_column_to_doris_column.h
+++ b/be/src/vec/utils/arrow_column_to_doris_column.h
@@ -42,6 +42,4 @@ Status arrow_column_to_doris_column(const arrow::Array*
arrow_column, size_t arr
ColumnPtr& doris_column, const
DataTypePtr& type,
size_t num_elements, const
cctz::time_zone& ctz);
-Status arrow_type_to_doris_type(arrow::Type::type type, TypeDescriptor*
return_type);
-
} // namespace doris::vectorized
diff --git a/be/test/olap/row_cursor_test.cpp b/be/test/olap/row_cursor_test.cpp
index 5899f557f3..9ee6ae7943 100644
--- a/be/test/olap/row_cursor_test.cpp
+++ b/be/test/olap/row_cursor_test.cpp
@@ -286,8 +286,6 @@ TEST_F(TestRowCursor, InitRowCursorWithColumnCount) {
EXPECT_EQ(res, Status::OK());
EXPECT_EQ(row.get_fixed_len(), 23);
EXPECT_EQ(row.get_variable_len(), 0);
- row.allocate_memory_for_string_type(tablet_schema);
- EXPECT_EQ(row.get_variable_len(), 0);
}
TEST_F(TestRowCursor, InitRowCursorWithColIds) {
diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
index 9426505489..484d5adbd6 100644
--- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
@@ -26,6 +26,7 @@
#include "io/buffered_reader.h"
#include "io/fs/local_file_system.h"
#include "olap/iterators.h"
+#include "runtime/descriptors.h"
#include "util/runtime_profile.h"
#include "util/timezone_utils.h"
#include "vec/common/string_ref.h"
@@ -226,6 +227,74 @@ static Status get_column_values(io::FileReaderSPtr
file_reader, tparquet::Column
}
}
+// Only the unit test depend on this, but it is wrong, should not use
TTupleDesc to create tuple desc, not
+// use columndesc
+static doris::TupleDescriptor* create_tuple_desc(
+ doris::ObjectPool* pool,
std::vector<doris::SchemaScanner::ColumnDesc>& column_descs) {
+ using namespace doris;
+ int null_column = 0;
+ for (int i = 0; i < column_descs.size(); ++i) {
+ if (column_descs[i].is_null) {
+ null_column++;
+ }
+ }
+
+ int offset = (null_column + 7) / 8;
+ std::vector<SlotDescriptor*> slots;
+ int null_byte = 0;
+ int null_bit = 0;
+
+ for (int i = 0; i < column_descs.size(); ++i) {
+ TSlotDescriptor t_slot_desc;
+ if (column_descs[i].type == TYPE_DECIMALV2) {
+
t_slot_desc.__set_slotType(TypeDescriptor::create_decimalv2_type(27,
9).to_thrift());
+ } else {
+ TypeDescriptor descriptor(column_descs[i].type);
+ if (column_descs[i].precision >= 0 && column_descs[i].scale >= 0) {
+ descriptor.precision = column_descs[i].precision;
+ descriptor.scale = column_descs[i].scale;
+ }
+ t_slot_desc.__set_slotType(descriptor.to_thrift());
+ }
+ t_slot_desc.__set_colName(column_descs[i].name);
+ t_slot_desc.__set_columnPos(i);
+ t_slot_desc.__set_byteOffset(offset);
+
+ if (column_descs[i].is_null) {
+ t_slot_desc.__set_nullIndicatorByte(null_byte);
+ t_slot_desc.__set_nullIndicatorBit(null_bit);
+ null_bit = (null_bit + 1) % 8;
+
+ if (0 == null_bit) {
+ null_byte++;
+ }
+ } else {
+ t_slot_desc.__set_nullIndicatorByte(0);
+ t_slot_desc.__set_nullIndicatorBit(-1);
+ }
+
+ t_slot_desc.id = i;
+ t_slot_desc.__set_slotIdx(i);
+ t_slot_desc.__set_isMaterialized(true);
+
+ SlotDescriptor* slot = pool->add(new (std::nothrow)
SlotDescriptor(t_slot_desc));
+ slots.push_back(slot);
+ offset += column_descs[i].size;
+ }
+
+ TTupleDescriptor t_tuple_desc;
+ t_tuple_desc.__set_byteSize(offset);
+ t_tuple_desc.__set_numNullBytes((null_byte * 8 + null_bit + 7) / 8);
+ doris::TupleDescriptor* tuple_desc =
+ pool->add(new (std::nothrow) doris::TupleDescriptor(t_tuple_desc));
+
+ for (int i = 0; i < slots.size(); ++i) {
+ tuple_desc->add_slot(slots[i]);
+ }
+
+ return tuple_desc;
+}
+
static void create_block(std::unique_ptr<vectorized::Block>& block) {
// Current supported column type:
std::vector<SchemaScanner::ColumnDesc> column_descs = {
@@ -247,11 +316,9 @@ static void
create_block(std::unique_ptr<vectorized::Block>& block) {
{"date_col", TYPE_DATE, sizeof(DateTimeValue), true},
{"date_v2_col", TYPE_DATEV2, sizeof(uint32_t), true},
{"timestamp_v2_col", TYPE_DATETIMEV2, sizeof(DateTimeValue), true,
18, 0}};
- SchemaScanner schema_scanner(column_descs);
ObjectPool object_pool;
- SchemaScannerParam param;
- schema_scanner.init(¶m, &object_pool);
- auto tuple_slots =
const_cast<TupleDescriptor*>(schema_scanner.tuple_desc())->slots();
+ doris::TupleDescriptor* tuple_desc = create_tuple_desc(&object_pool,
column_descs);
+ auto tuple_slots = tuple_desc->slots();
block.reset(new vectorized::Block());
for (const auto& slot_desc : tuple_slots) {
auto data_type = slot_desc->get_data_type_ptr();
@@ -347,7 +414,7 @@ TEST_F(ParquetThriftReaderTest, dict_decoder) {
}
TEST_F(ParquetThriftReaderTest, group_reader) {
- std::vector<SchemaScanner::ColumnDesc> column_descs = {
+ std::vector<doris::SchemaScanner::ColumnDesc> column_descs = {
{"tinyint_col", TYPE_TINYINT, sizeof(int8_t), true},
{"smallint_col", TYPE_SMALLINT, sizeof(int16_t), true},
{"int_col", TYPE_INT, sizeof(int32_t), true},
@@ -362,11 +429,9 @@ TEST_F(ParquetThriftReaderTest, group_reader) {
{"char_col", TYPE_CHAR, sizeof(StringRef), true},
{"varchar_col", TYPE_VARCHAR, sizeof(StringRef), true},
{"date_col", TYPE_DATE, sizeof(DateTimeValue), true}};
- SchemaScanner schema_scanner(column_descs);
ObjectPool object_pool;
- SchemaScannerParam param;
- schema_scanner.init(¶m, &object_pool);
- auto tuple_slots =
const_cast<TupleDescriptor*>(schema_scanner.tuple_desc())->slots();
+ doris::TupleDescriptor* tuple_desc = create_tuple_desc(&object_pool,
column_descs);
+ auto tuple_slots = tuple_desc->slots();
TSlotDescriptor tslot_desc;
{
diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp
index 0e91ba42e3..45b7ca5025 100644
--- a/be/test/vec/exprs/vexpr_test.cpp
+++ b/be/test/vec/exprs/vexpr_test.cpp
@@ -26,6 +26,7 @@
#include "exec/schema_scanner.h"
#include "gen_cpp/Exprs_types.h"
#include "gen_cpp/Types_types.h"
+#include "runtime/descriptors.h"
#include "runtime/exec_env.h"
#include "runtime/large_int_value.h"
#include "runtime/memory/chunk_allocator.h"
@@ -63,15 +64,80 @@ TEST(TEST_VEXPR, ABSTEST) {
context->close(&runtime_stat);
}
+// Only the unit test depend on this, but it is wrong, should not use
TTupleDesc to create tuple desc, not
+// use columndesc
+static doris::TupleDescriptor* create_tuple_desc(
+ doris::ObjectPool* pool,
std::vector<doris::SchemaScanner::ColumnDesc>& column_descs) {
+ using namespace doris;
+ int null_column = 0;
+ for (int i = 0; i < column_descs.size(); ++i) {
+ if (column_descs[i].is_null) {
+ null_column++;
+ }
+ }
+
+ int offset = (null_column + 7) / 8;
+ std::vector<SlotDescriptor*> slots;
+ int null_byte = 0;
+ int null_bit = 0;
+
+ for (int i = 0; i < column_descs.size(); ++i) {
+ TSlotDescriptor t_slot_desc;
+ if (column_descs[i].type == TYPE_DECIMALV2) {
+
t_slot_desc.__set_slotType(TypeDescriptor::create_decimalv2_type(27,
9).to_thrift());
+ } else {
+ TypeDescriptor descriptor(column_descs[i].type);
+ if (column_descs[i].precision >= 0 && column_descs[i].scale >= 0) {
+ descriptor.precision = column_descs[i].precision;
+ descriptor.scale = column_descs[i].scale;
+ }
+ t_slot_desc.__set_slotType(descriptor.to_thrift());
+ }
+ t_slot_desc.__set_colName(column_descs[i].name);
+ t_slot_desc.__set_columnPos(i);
+ t_slot_desc.__set_byteOffset(offset);
+
+ if (column_descs[i].is_null) {
+ t_slot_desc.__set_nullIndicatorByte(null_byte);
+ t_slot_desc.__set_nullIndicatorBit(null_bit);
+ null_bit = (null_bit + 1) % 8;
+
+ if (0 == null_bit) {
+ null_byte++;
+ }
+ } else {
+ t_slot_desc.__set_nullIndicatorByte(0);
+ t_slot_desc.__set_nullIndicatorBit(-1);
+ }
+
+ t_slot_desc.id = i;
+ t_slot_desc.__set_slotIdx(i);
+ t_slot_desc.__set_isMaterialized(true);
+
+ SlotDescriptor* slot = pool->add(new (std::nothrow)
SlotDescriptor(t_slot_desc));
+ slots.push_back(slot);
+ offset += column_descs[i].size;
+ }
+
+ TTupleDescriptor t_tuple_desc;
+ t_tuple_desc.__set_byteSize(offset);
+ t_tuple_desc.__set_numNullBytes((null_byte * 8 + null_bit + 7) / 8);
+ doris::TupleDescriptor* tuple_desc =
+ pool->add(new (std::nothrow) doris::TupleDescriptor(t_tuple_desc));
+
+ for (int i = 0; i < slots.size(); ++i) {
+ tuple_desc->add_slot(slots[i]);
+ }
+
+ return tuple_desc;
+}
+
TEST(TEST_VEXPR, ABSTEST2) {
using namespace doris;
- std::vector<SchemaScanner::ColumnDesc> column_descs = {
+ std::vector<doris::SchemaScanner::ColumnDesc> column_descs = {
{"k1", TYPE_INT, sizeof(int32_t), false}};
- SchemaScanner schema_scanner(column_descs);
ObjectPool object_pool;
- SchemaScannerParam param;
- schema_scanner.init(¶m, &object_pool);
- auto tuple_desc =
const_cast<TupleDescriptor*>(schema_scanner.tuple_desc());
+ doris::TupleDescriptor* tuple_desc = create_tuple_desc(&object_pool,
column_descs);
RowDescriptor row_desc(tuple_desc, false);
std::string expr_json =
R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_ud
[...]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]