This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 53ba46e404 [Fix][Refactor] Fix 'not member call on null pointer of
type 'doris::TextConverter' error in ubsan env and refactor text converter.
(#19849)
53ba46e404 is described below
commit 53ba46e40419cb2c2146b30ccf4a85fe4a5a5c05
Author: Qi Chen <[email protected]>
AuthorDate: Mon May 22 21:00:19 2023 +0800
[Fix][Refactor] Fix 'not member call on null pointer of type
'doris::TextConverter' error in ubsan env and refactor text converter. (#19849)
Fix 'not member call on null pointer of type doris::TextConverter' error in
ubsan env and refactor text converter.
---
be/src/exec/text_converter.cpp | 16 ++
be/src/exec/text_converter.h | 18 +-
be/src/exec/text_converter.hpp | 254 ---------------------
be/src/vec/exec/format/csv/csv_reader.cpp | 1 -
be/src/vec/exec/format/orc/vorc_reader.cpp | 1 +
.../exec/format/parquet/vparquet_group_reader.cpp | 1 +
be/src/vec/exec/scan/new_odbc_scanner.cpp | 1 -
be/src/vec/exec/vmysql_scan_node.cpp | 1 -
8 files changed, 31 insertions(+), 262 deletions(-)
diff --git a/be/src/exec/text_converter.cpp b/be/src/exec/text_converter.cpp
index 194dde13e6..02893c1dfc 100644
--- a/be/src/exec/text_converter.cpp
+++ b/be/src/exec/text_converter.cpp
@@ -44,6 +44,22 @@ namespace doris {
TextConverter::TextConverter(char escape_char) : _escape_char(escape_char) {}
+void TextConverter::write_string_column(const SlotDescriptor* slot_desc,
+ vectorized::MutableColumnPtr*
column_ptr, const char* data,
+ size_t len) {
+ DCHECK(column_ptr->get()->is_nullable());
+ auto* nullable_column =
reinterpret_cast<vectorized::ColumnNullable*>(column_ptr->get());
+ if ((len == 2 && data[0] == '\\' && data[1] == 'N') || len ==
SQL_NULL_DATA) {
+ nullable_column->get_null_map_data().push_back(1);
+
reinterpret_cast<vectorized::ColumnString&>(nullable_column->get_nested_column())
+ .insert_default();
+ } else {
+ nullable_column->get_null_map_data().push_back(0);
+
reinterpret_cast<vectorized::ColumnString&>(nullable_column->get_nested_column())
+ .insert_data(data, len);
+ }
+}
+
bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc,
vectorized::IColumn* nullable_col_ptr,
const char* data,
size_t len, bool copy_string, bool
need_escape, size_t rows) {
diff --git a/be/src/exec/text_converter.h b/be/src/exec/text_converter.h
index d70cfba982..9615471a8f 100644
--- a/be/src/exec/text_converter.h
+++ b/be/src/exec/text_converter.h
@@ -35,11 +35,19 @@ public:
vectorized::MutableColumnPtr* column_ptr, const
char* data,
size_t len);
- bool write_column(const SlotDescriptor* slot_desc,
vectorized::MutableColumnPtr* column_ptr,
- const char* data, size_t len, bool copy_string, bool
need_escape);
-
- bool write_vec_column(const SlotDescriptor* slot_desc,
vectorized::IColumn* nullable_col_ptr,
- const char* data, size_t len, bool copy_string, bool
need_escape);
+ inline bool write_column(const SlotDescriptor* slot_desc,
+ vectorized::MutableColumnPtr* column_ptr, const
char* data, size_t len,
+ bool copy_string, bool need_escape) {
+ vectorized::IColumn* nullable_col_ptr = column_ptr->get();
+ return write_vec_column(slot_desc, nullable_col_ptr, data, len,
copy_string, need_escape);
+ }
+
+ inline bool write_vec_column(const SlotDescriptor* slot_desc,
+ vectorized::IColumn* nullable_col_ptr, const
char* data,
+ size_t len, bool copy_string, bool
need_escape) {
+ return write_vec_column(slot_desc, nullable_col_ptr, data, len,
copy_string, need_escape,
+ 1);
+ }
/// Write consecutive rows of the same data.
bool write_vec_column(const SlotDescriptor* slot_desc,
vectorized::IColumn* nullable_col_ptr,
diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp
deleted file mode 100644
index ddef2dd42e..0000000000
--- a/be/src/exec/text_converter.hpp
+++ /dev/null
@@ -1,254 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <sql.h>
-
-#include <boost/algorithm/string.hpp>
-
-#include "runtime/datetime_value.h"
-#include "runtime/decimalv2_value.h"
-#include "runtime/descriptors.h"
-#include "text_converter.h"
-#include "util/binary_cast.hpp"
-#include "util/string_parser.hpp"
-#include "util/types.h"
-#include "vec/columns/column_complex.h"
-#include "vec/columns/column_nullable.h"
-#include "vec/runtime/vdatetime_value.h"
-
-namespace doris {
-
-inline void TextConverter::write_string_column(const SlotDescriptor* slot_desc,
- vectorized::MutableColumnPtr*
column_ptr,
- const char* data, size_t len) {
- DCHECK(column_ptr->get()->is_nullable());
- auto* nullable_column =
reinterpret_cast<vectorized::ColumnNullable*>(column_ptr->get());
- if (len == 2 && data[0] == '\\' && data[1] == 'N') {
- nullable_column->get_null_map_data().push_back(1);
-
reinterpret_cast<vectorized::ColumnString&>(nullable_column->get_nested_column())
- .insert_default();
- } else {
- nullable_column->get_null_map_data().push_back(0);
-
reinterpret_cast<vectorized::ColumnString&>(nullable_column->get_nested_column())
- .insert_data(data, len);
- }
-}
-
-inline bool TextConverter::write_column(const SlotDescriptor* slot_desc,
- vectorized::MutableColumnPtr*
column_ptr, const char* data,
- size_t len, bool copy_string, bool
need_escape) {
- vectorized::IColumn* nullable_col_ptr = column_ptr->get();
- return write_vec_column(slot_desc, nullable_col_ptr, data, len,
copy_string, need_escape);
-}
-
-inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc,
- vectorized::IColumn*
nullable_col_ptr, const char* data,
- size_t len, bool copy_string, bool
need_escape) {
- vectorized::IColumn* col_ptr = nullable_col_ptr;
- // \N means it's NULL
- if (slot_desc->is_nullable()) {
- auto* nullable_column =
reinterpret_cast<vectorized::ColumnNullable*>(nullable_col_ptr);
- if ((len == 2 && data[0] == '\\' && data[1] == 'N') || len ==
SQL_NULL_DATA) {
- nullable_column->insert_data(nullptr, 0);
- return true;
- } else {
- nullable_column->get_null_map_data().push_back(0);
- col_ptr = &nullable_column->get_nested_column();
- }
- }
-
- bool insert_after_parse_failure = true;
- StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
- // Parse the raw-text data. Translate the text string to internal format.
- switch (slot_desc->type().type) {
- case TYPE_HLL: {
-
reinterpret_cast<vectorized::ColumnHLL*>(col_ptr)->get_data().emplace_back(
- HyperLogLog(Slice(data, len)));
- break;
- }
- case TYPE_STRING:
- case TYPE_VARCHAR:
- case TYPE_CHAR: {
- if (need_escape) {
- unescape_string_on_spot(data, &len);
- }
-
reinterpret_cast<vectorized::ColumnString*>(col_ptr)->insert_data(data, len);
- break;
- }
-
- case TYPE_BOOLEAN: {
- bool num = StringParser::string_to_bool(data, len, &parse_result);
-
reinterpret_cast<vectorized::ColumnVector<vectorized::UInt8>*>(col_ptr)->insert_value(
- (uint8_t)num);
- break;
- }
- case TYPE_TINYINT: {
- int8_t num = StringParser::string_to_int<int8_t>(data, len,
&parse_result);
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int8>*>(col_ptr)->insert_value(num);
- break;
- }
- case TYPE_SMALLINT: {
- int16_t num = StringParser::string_to_int<int16_t>(data, len,
&parse_result);
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int16>*>(col_ptr)->insert_value(num);
- break;
- }
- case TYPE_INT: {
- int32_t num = StringParser::string_to_int<int32_t>(data, len,
&parse_result);
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int32>*>(col_ptr)->insert_value(num);
- break;
- }
- case TYPE_BIGINT: {
- int64_t num = StringParser::string_to_int<int64_t>(data, len,
&parse_result);
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(num);
- break;
- }
- case TYPE_LARGEINT: {
- __int128 num = StringParser::string_to_int<__int128>(data, len,
&parse_result);
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(num);
- break;
- }
-
- case TYPE_FLOAT: {
- float num = StringParser::string_to_float<float>(data, len,
&parse_result);
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Float32>*>(col_ptr)->insert_value(
- num);
- break;
- }
- case TYPE_DOUBLE: {
- double num = StringParser::string_to_float<double>(data, len,
&parse_result);
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Float64>*>(col_ptr)->insert_value(
- num);
- break;
- }
- case TYPE_DATE: {
- vectorized::VecDateTimeValue ts_slot;
- if (!ts_slot.from_date_str(data, len)) {
- parse_result = StringParser::PARSE_FAILURE;
- insert_after_parse_failure = false;
- break;
- }
- ts_slot.cast_to_date();
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
- reinterpret_cast<char*>(&ts_slot), 0);
- break;
- }
- case TYPE_DATEV2: {
- vectorized::VecDateTimeValue ts_slot;
- if (!ts_slot.from_date_str(data, len)) {
- parse_result = StringParser::PARSE_FAILURE;
- insert_after_parse_failure = false;
- break;
- }
- ts_slot.cast_to_date();
- uint32_t num = ts_slot.to_date_v2();
-
reinterpret_cast<vectorized::ColumnVector<vectorized::UInt32>*>(col_ptr)->insert_value(num);
- break;
- }
- case TYPE_DATETIME: {
- vectorized::VecDateTimeValue ts_slot;
- if (!ts_slot.from_date_str(data, len)) {
- parse_result = StringParser::PARSE_FAILURE;
- insert_after_parse_failure = false;
- break;
- }
- ts_slot.to_datetime();
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
- reinterpret_cast<char*>(&ts_slot), 0);
- break;
- }
- case TYPE_DATETIMEV2: {
- vectorized::DateV2Value<vectorized::DateTimeV2ValueType> ts_slot;
- if (!ts_slot.from_date_str(data, len, slot_desc->type().scale)) {
- parse_result = StringParser::PARSE_FAILURE;
- insert_after_parse_failure = false;
- break;
- }
- uint64_t num = ts_slot.to_date_int_val();
-
reinterpret_cast<vectorized::ColumnVector<vectorized::UInt64>*>(col_ptr)->insert_value(num);
- break;
- }
-
- case TYPE_DECIMALV2: {
- DecimalV2Value decimal_slot;
- if (decimal_slot.parse_from_str(data, len)) {
- parse_result = StringParser::PARSE_FAILURE;
- insert_after_parse_failure = false;
- break;
- }
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(
- decimal_slot.value());
- break;
- }
- case TYPE_DECIMAL32: {
- StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
- int32_t value = StringParser::string_to_decimal<int32_t>(
- data, len, slot_desc->type().precision,
slot_desc->type().scale, &result);
- if (result != StringParser::PARSE_SUCCESS) {
- parse_result = StringParser::PARSE_FAILURE;
- break;
- }
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int32>*>(col_ptr)->insert_value(
- value);
- break;
- }
- case TYPE_DECIMAL64: {
- StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
- int64_t value = StringParser::string_to_decimal<int64_t>(
- data, len, slot_desc->type().precision,
slot_desc->type().scale, &result);
- if (result != StringParser::PARSE_SUCCESS) {
- parse_result = StringParser::PARSE_FAILURE;
- break;
- }
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(
- value);
- break;
- }
- case TYPE_DECIMAL128I: {
- StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
- vectorized::Int128 value =
StringParser::string_to_decimal<vectorized::Int128>(
- data, len, slot_desc->type().precision,
slot_desc->type().scale, &result);
- if (result != StringParser::PARSE_SUCCESS) {
- parse_result = StringParser::PARSE_FAILURE;
- break;
- }
-
reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(
- value);
- break;
- }
- default:
- DCHECK(false) << "bad slot type: " << slot_desc->type();
- break;
- }
-
- if (UNLIKELY(parse_result == StringParser::PARSE_FAILURE)) {
- if (slot_desc->is_nullable()) {
- auto* nullable_column =
reinterpret_cast<vectorized::ColumnNullable*>(nullable_col_ptr);
- size_t size = nullable_column->get_null_map_data().size();
- doris::vectorized::NullMap& null_map_data =
nullable_column->get_null_map_data();
- null_map_data[size - 1] = 1;
- if (!insert_after_parse_failure) {
- nullable_column->get_nested_column().insert_default();
- }
- }
- return false;
- }
- return true;
-}
-
-} // namespace doris
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 7cb36e94c7..09de2c6479 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -36,7 +36,6 @@
#include "exec/decompressor.h"
#include "exec/line_reader.h"
#include "exec/text_converter.h"
-#include "exec/text_converter.hpp"
#include "io/file_factory.h"
#include "io/fs/broker_file_reader.h"
#include "io/fs/buffered_reader.h"
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index eb944bb84c..990dfc0477 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -244,6 +244,7 @@ Status OrcReader::init_reader(
VExprContext* vconjunct_ctx) {
_colname_to_value_range = colname_to_value_range;
_lazy_read_ctx.vconjunct_ctx = vconjunct_ctx;
+ _text_converter.reset(new TextConverter('\\'));
SCOPED_RAW_TIMER(&_statistics.parse_meta_time);
RETURN_IF_ERROR(_create_file_reader());
RETURN_IF_ERROR(_init_read_columns());
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 82c9b122a1..abda93afde 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -116,6 +116,7 @@ Status RowGroupReader::init(
_row_descriptor = row_descriptor;
_col_name_to_slot_id = colname_to_slot_id;
_slot_id_to_filter_conjuncts = slot_id_to_filter_conjuncts;
+ _text_converter.reset(new TextConverter('\\'));
if (not_single_slot_filter_conjuncts) {
_filter_conjuncts.insert(_filter_conjuncts.end(),
not_single_slot_filter_conjuncts->begin(),
not_single_slot_filter_conjuncts->end());
diff --git a/be/src/vec/exec/scan/new_odbc_scanner.cpp
b/be/src/vec/exec/scan/new_odbc_scanner.cpp
index 8494973f08..1022be3c83 100644
--- a/be/src/vec/exec/scan/new_odbc_scanner.cpp
+++ b/be/src/vec/exec/scan/new_odbc_scanner.cpp
@@ -28,7 +28,6 @@
#include "common/logging.h"
#include "common/status.h"
-#include "exec/text_converter.hpp"
#include "runtime/descriptors.h"
#include "runtime/runtime_state.h"
#include "runtime/types.h"
diff --git a/be/src/vec/exec/vmysql_scan_node.cpp
b/be/src/vec/exec/vmysql_scan_node.cpp
index 8673861f66..1d620141b9 100644
--- a/be/src/vec/exec/vmysql_scan_node.cpp
+++ b/be/src/vec/exec/vmysql_scan_node.cpp
@@ -20,7 +20,6 @@
#include <gen_cpp/PlanNodes_types.h>
#include "exec/text_converter.h"
-#include "exec/text_converter.hpp"
#include "runtime/runtime_state.h"
#include "util/runtime_profile.h"
#include "util/types.h"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]