This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 8cb910f6ac8 [pick](4.0) Use ColumnView to automatically handle
Nullable and Const, Fix array_join to handle non-constant columns. (#60707)
8cb910f6ac8 is described below
commit 8cb910f6ac8c0eae5d3308dfd1da794b45bbccac
Author: Mryange <[email protected]>
AuthorDate: Thu Feb 12 23:43:37 2026 +0800
[pick](4.0) Use ColumnView to automatically handle Nullable and Const, Fix
array_join to handle non-constant columns. (#60707)
### What problem does this PR solve?
https://github.com/apache/doris/pull/60612
https://github.com/apache/doris/pull/60575
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/vec/columns/column_execute_util.h | 101 +++++++
be/src/vec/columns/column_string.cpp | 14 -
be/src/vec/columns/column_string.h | 14 +-
be/src/vec/functions/array/function_array_join.h | 126 ++++----
be/src/vec/functions/functions_geo.cpp | 320 ++++++---------------
be/test/vec/columns/column_execute_util_test.cpp | 164 +++++++++++
.../array_functions/test_array_functions.out | 10 +
.../array_functions/test_array_functions.groovy | 36 +++
8 files changed, 479 insertions(+), 306 deletions(-)
diff --git a/be/src/vec/columns/column_execute_util.h
b/be/src/vec/columns/column_execute_util.h
new file mode 100644
index 00000000000..b7d2bf1f11c
--- /dev/null
+++ b/be/src/vec/columns/column_execute_util.h
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <tuple>
+#include <variant>
+
+#include "runtime/define_primitive_type.h"
+#include "runtime/primitive_type.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/core/column_with_type_and_name.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+// Utility tools for convenient column execution
+
+// ColumnElementView is used to distinguish between scalar columns and string
columns
+template <PrimitiveType PType>
+struct ColumnElementView {
+ using ColumnType = typename PrimitiveTypeTraits<PType>::ColumnType;
+ using ElementType = typename ColumnType::value_type;
+ const typename ColumnType::Container& data;
+ ElementType get_element(size_t idx) const { return data[idx]; }
+
+ ColumnElementView(const IColumn& column)
+ : data(assert_cast<const ColumnType&>(column).get_data()) {}
+};
+
+template <>
+struct ColumnElementView<TYPE_STRING> {
+ using ColumnType = ColumnString;
+ using ElementType = StringRef;
+ const ColumnString& string_column;
+ ColumnElementView(const IColumn& column)
+ : string_column(assert_cast<const ColumnString&>(column)) {}
+ StringRef get_element(size_t idx) const { return
string_column.get_data_at(idx); }
+};
+
+// ColumnView is used to handle the nullable and const properties of a column.
+// For example, a regular ColumnInt32 may appear in the following 4 cases:
+// 1. ColumnInt32
+// 2. Const(ColumnInt32)
+// 3. Nullable(ColumnInt32)
+// 4. Const(Nullable(ColumnInt32)) (although this case is rare, it can still
occur; many of our previous code did not consider this)
+// You can use is_null_at and value_at to get the data at the corresponding
position
+
+template <PrimitiveType PType>
+struct ColumnView {
+ const ColumnElementView<PType> data;
+ const NullMap* null_map;
+ const bool is_const;
+ const size_t count;
+
+ static ColumnView create(const ColumnPtr& column_ptr) {
+ const auto& [from_data_column, is_const] = unpack_if_const(column_ptr);
+ const NullMap* null_map = nullptr;
+ const IColumn* data = nullptr;
+ if (const auto* nullable_column =
+
check_and_get_column<ColumnNullable>(from_data_column.get())) {
+ null_map = &nullable_column->get_null_map_data();
+ data = nullable_column->get_nested_column_ptr().get();
+ } else {
+ data = from_data_column.get();
+ }
+
+ return ColumnView {.data = ColumnElementView<PType>(*data),
+ .null_map = null_map,
+ .is_const = is_const,
+ .count = column_ptr->size()};
+ }
+
+ bool is_null_at(size_t idx) const {
+ if (null_map != nullptr) {
+ return (*null_map)[is_const ? 0 : idx];
+ }
+ return false;
+ }
+
+ auto value_at(size_t idx) const { return data.get_element(is_const ? 0 :
idx); }
+
+ size_t size() const { return count; }
+};
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/columns/column_string.cpp
b/be/src/vec/columns/column_string.cpp
index 8e93b5d0258..b915096759d 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -53,20 +53,6 @@ void ColumnStr<T>::sanity_check() const {
#endif
}
-template <typename T>
-void ColumnStr<T>::sanity_check_simple() const {
-#ifndef NDEBUG
- auto count = cast_set<int64_t>(offsets.size());
- if (chars.size() != offsets[count - 1]) {
- throw Exception(Status::InternalError("row count: {}, chars.size():
{}, offset[{}]: {}",
- count, chars.size(), count - 1,
offsets[count - 1]));
- }
- if (offsets[-1] != 0) {
- throw Exception(Status::InternalError("wrong offsets[-1]: {}",
offsets[-1]));
- }
-#endif
-}
-
template <typename T>
MutableColumnPtr ColumnStr<T>::clone_resized(size_t to_size) const {
auto res = ColumnStr<T>::create();
diff --git a/be/src/vec/columns/column_string.h
b/be/src/vec/columns/column_string.h
index a45308e0e84..0893978754b 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -115,7 +115,19 @@ public:
bool is_variable_length() const override { return true; }
void sanity_check() const override;
- void sanity_check_simple() const;
+ void sanity_check_simple() const {
+#ifndef NDEBUG
+ auto count = cast_set<int64_t>(offsets.size());
+ if (chars.size() != offsets[count - 1]) {
+ throw Exception(Status::InternalError("row count: {},
chars.size(): {}, offset[{}]: {}",
+ count, chars.size(), count -
1,
+ offsets[count - 1]));
+ }
+ if (offsets[-1] != 0) {
+ throw Exception(Status::InternalError("wrong offsets[-1]: {}",
offsets[-1]));
+ }
+#endif
+ }
std::string get_name() const override { return "String"; }
diff --git a/be/src/vec/functions/array/function_array_join.h
b/be/src/vec/functions/array/function_array_join.h
index 39b9fe1ba1a..2880cc2b2db 100644
--- a/be/src/vec/functions/array/function_array_join.h
+++ b/be/src/vec/functions/array/function_array_join.h
@@ -18,6 +18,9 @@
#include "vec/columns/column_array.h"
#include "vec/columns/column_const.h"
+#include "vec/columns/column_execute_util.h"
+#include "vec/common/string_ref.h"
+#include "vec/core/block.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_string.h"
#include "vec/functions/array/function_array_utils.h"
@@ -65,29 +68,35 @@ public:
block.get_by_position(arguments[0]).type->get_name()));
}
- ColumnPtr sep_column =
-
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
- ColumnPtr null_replace_column =
- (arguments.size() > 2 ? block.get_by_position(arguments[2])
-
.column->convert_to_full_column_if_const()
- : nullptr);
-
- std::string sep_str = _get_string_from_column(sep_column);
- std::string null_replace_str =
_get_string_from_column(null_replace_column);
-
auto nested_type = data_type_array->get_nested_type();
auto dest_column_ptr = ColumnString::create();
- DCHECK(dest_column_ptr);
- auto res_val = _execute_string(*src.nested_col, *src.offsets_ptr,
src.nested_nullmap_data,
- sep_str, null_replace_str,
dest_column_ptr.get());
- if (!res_val) {
- return Status::RuntimeError(fmt::format(
- "execute failed or unsupported types for function
{}({},{},{})", "array_join",
- block.get_by_position(arguments[0]).type->get_name(),
- block.get_by_position(arguments[1]).type->get_name(),
- (arguments.size() > 2 ?
block.get_by_position(arguments[2]).type->get_name()
- : "")));
+ auto& dest_chars = dest_column_ptr->get_chars();
+ auto& dest_offsets = dest_column_ptr->get_offsets();
+
+ dest_offsets.resize_fill(src_column->size(), 0);
+
+ auto sep_column =
+
ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[1]).column);
+
+ if (arguments.size() > 2) {
+ auto null_replace_column =
+
ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[2]).column);
+
+ _execute_string(*src.nested_col, *src.offsets_ptr,
src.nested_nullmap_data, sep_column,
+ null_replace_column, dest_chars, dest_offsets);
+
+ } else {
+ auto tmp_column_string = ColumnString::create();
+ // insert default value for null replacement, which is empty string
+ tmp_column_string->insert_default();
+ ColumnPtr tmp_const_column =
+ ColumnConst::create(std::move(tmp_column_string),
sep_column.size());
+
+ auto null_replace_column =
ColumnView<TYPE_STRING>::create(tmp_const_column);
+
+ _execute_string(*src.nested_col, *src.offsets_ptr,
src.nested_nullmap_data, sep_column,
+ null_replace_column, dest_chars, dest_offsets);
}
block.replace_by_position(result, std::move(dest_column_ptr));
@@ -95,61 +104,66 @@ public:
}
private:
- static std::string _get_string_from_column(const ColumnPtr& column_ptr) {
- if (!column_ptr) {
- return std::string("");
+ // same as ColumnString::insert_data
+ static void insert_to_chars(int64_t i, ColumnString::Chars& chars,
uint32_t& total_size,
+ const char* pos, size_t length) {
+ const size_t old_size = chars.size();
+ const size_t new_size = old_size + length;
+
+ if (length) {
+ ColumnString::check_chars_length(new_size, i);
+ chars.resize(new_size);
+ memcpy(chars.data() + old_size, pos, length);
+ total_size += length;
}
- const ColumnString* column_string_ptr =
check_and_get_column<ColumnString>(*column_ptr);
- StringRef str_ref = column_string_ptr->get_data_at(0);
- std::string str(str_ref.data, str_ref.size);
- return str;
}
- static void _fill_result_string(const std::string& input_str, const
std::string& sep_str,
- std::string& result_str, bool&
is_first_elem) {
+ static void _fill_result_string(int64_t i, const StringRef& input_str,
const StringRef& sep_str,
+ ColumnString::Chars& dest_chars, uint32_t&
total_size,
+ bool& is_first_elem) {
if (is_first_elem) {
- result_str.append(input_str);
+ insert_to_chars(i, dest_chars, total_size, input_str.data,
input_str.size);
is_first_elem = false;
} else {
- result_str.append(sep_str);
- result_str.append(input_str);
+ insert_to_chars(i, dest_chars, total_size, sep_str.data,
sep_str.size);
+ insert_to_chars(i, dest_chars, total_size, input_str.data,
input_str.size);
}
- return;
}
- static bool _execute_string(const IColumn& src_column,
+ static void _execute_string(const IColumn& src_column,
const ColumnArray::Offsets64& src_offsets,
- const UInt8* src_null_map, const std::string&
sep_str,
- const std::string& null_replace_str,
- ColumnString* dest_column_ptr) {
- const ColumnString* src_data_concrete = assert_cast<const
ColumnString*>(&src_column);
- if (!src_data_concrete) {
- return false;
- }
+ const UInt8* src_null_map,
ColumnView<TYPE_STRING>& sep_column,
+ ColumnView<TYPE_STRING>& null_replace_column,
+ ColumnString::Chars& dest_chars,
+ ColumnString::Offsets& dest_offsets) {
+ const auto& src_data = assert_cast<const ColumnString&>(src_column);
+
+ uint32_t total_size = 0;
+
+ for (int64_t i = 0; i < src_offsets.size(); ++i) {
+ auto begin = src_offsets[i - 1];
+ auto end = src_offsets[i];
+
+ auto sep_str = sep_column.value_at(i);
+ auto null_replace_str = null_replace_column.value_at(i);
- size_t prev_src_offset = 0;
- for (auto curr_src_offset : src_offsets) {
- std::string result_str;
bool is_first_elem = true;
- for (size_t j = prev_src_offset; j < curr_src_offset; ++j) {
+
+ for (size_t j = begin; j < end; ++j) {
if (src_null_map && src_null_map[j]) {
- if (null_replace_str.size() == 0) {
- continue;
- } else {
- _fill_result_string(null_replace_str, sep_str,
result_str, is_first_elem);
- continue;
+ if (null_replace_str.size != 0) {
+ _fill_result_string(i, null_replace_str, sep_str,
dest_chars, total_size,
+ is_first_elem);
}
+ continue;
}
- StringRef src_str_ref = src_data_concrete->get_data_at(j);
- std::string elem_str(src_str_ref.data, src_str_ref.size);
- _fill_result_string(elem_str, sep_str, result_str,
is_first_elem);
+ StringRef src_str_ref = src_data.get_data_at(j);
+ _fill_result_string(i, src_str_ref, sep_str, dest_chars,
total_size, is_first_elem);
}
- dest_column_ptr->insert_data(result_str.c_str(),
result_str.size());
- prev_src_offset = curr_src_offset;
+ dest_offsets[i] = total_size;
}
- return true;
}
};
diff --git a/be/src/vec/functions/functions_geo.cpp
b/be/src/vec/functions/functions_geo.cpp
index 01b98957795..c57d8800687 100644
--- a/be/src/vec/functions/functions_geo.cpp
+++ b/be/src/vec/functions/functions_geo.cpp
@@ -25,7 +25,9 @@
#include "geo/geo_common.h"
#include "geo/geo_types.h"
+#include "runtime/define_primitive_type.h"
#include "vec/columns/column.h"
+#include "vec/columns/column_execute_util.h"
#include "vec/columns/column_nullable.h"
#include "vec/common/assert_cast.h"
#include "vec/common/string_ref.h"
@@ -46,75 +48,32 @@ struct StPoint {
DCHECK_EQ(arguments.size(), 2);
auto return_type = block.get_data_type(result);
- const auto& [left_column, left_const] =
- unpack_if_const(block.get_by_position(arguments[0]).column);
- const auto& [right_column, right_const] =
- unpack_if_const(block.get_by_position(arguments[1]).column);
+ auto x_col =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[0]).column);
+ auto y_col =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[1]).column);
- const auto size = std::max(left_column->size(), right_column->size());
+ const auto size = x_col.size();
auto res = ColumnString::create();
auto null_map = ColumnUInt8::create(size, 0);
auto& null_map_data = null_map->get_data();
- const auto* left_column_f64 = assert_cast<const
ColumnFloat64*>(left_column.get());
- const auto* right_column_f64 = assert_cast<const
ColumnFloat64*>(right_column.get());
GeoPoint point;
std::string buf;
- if (left_const) {
- const_vector(left_column_f64, right_column_f64, res,
null_map_data, size, point, buf);
- } else if (right_const) {
- vector_const(left_column_f64, right_column_f64, res,
null_map_data, size, point, buf);
- } else {
- vector_vector(left_column_f64, right_column_f64, res,
null_map_data, size, point, buf);
+ for (int row = 0; row < size; ++row) {
+ auto cur_res = point.from_coord(x_col.value_at(row),
y_col.value_at(row));
+ if (cur_res != GEO_PARSE_OK) {
+ null_map_data[row] = 1;
+ res->insert_default();
+ continue;
+ }
+ buf.clear();
+ point.encode_to(&buf);
+ res->insert_data(buf.data(), buf.size());
}
block.replace_by_position(result,
ColumnNullable::create(std::move(res),
std::move(null_map)));
return Status::OK();
}
-
- static void loop_do(GeoParseStatus& cur_res, ColumnString::MutablePtr&
res, NullMap& null_map,
- int row, GeoPoint& point, std::string& buf) {
- if (cur_res != GEO_PARSE_OK) {
- null_map[row] = 1;
- res->insert_default();
- return;
- }
-
- buf.clear();
- point.encode_to(&buf);
- res->insert_data(buf.data(), buf.size());
- }
-
- static void const_vector(const ColumnFloat64* left_column, const
ColumnFloat64* right_column,
- ColumnString::MutablePtr& res, NullMap& null_map,
const size_t size,
- GeoPoint& point, std::string& buf) {
- double x = left_column->get_element(0);
- for (int row = 0; row < size; ++row) {
- auto cur_res = point.from_coord(x, right_column->get_element(row));
- loop_do(cur_res, res, null_map, row, point, buf);
- }
- }
-
- static void vector_const(const ColumnFloat64* left_column, const
ColumnFloat64* right_column,
- ColumnString::MutablePtr& res, NullMap& null_map,
const size_t size,
- GeoPoint& point, std::string& buf) {
- double y = right_column->get_element(0);
- for (int row = 0; row < size; ++row) {
- auto cur_res = point.from_coord(left_column->get_element(row), y);
- loop_do(cur_res, res, null_map, row, point, buf);
- }
- }
-
- static void vector_vector(const ColumnFloat64* left_column, const
ColumnFloat64* right_column,
- ColumnString::MutablePtr& res, NullMap&
null_map, const size_t size,
- GeoPoint& point, std::string& buf) {
- for (int row = 0; row < size; ++row) {
- auto cur_res =
- point.from_coord(left_column->get_element(row),
right_column->get_element(row));
- loop_do(cur_res, res, null_map, row, point, buf);
- }
- }
};
struct StAsTextName {
@@ -242,31 +201,20 @@ struct StDistanceSphere {
DCHECK_EQ(arguments.size(), 4);
auto return_type = block.get_data_type(result);
- ColumnPtr x_lng_origin =
-
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
- ColumnPtr x_lat_origin =
-
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
- ColumnPtr y_lng_origin =
-
block.get_by_position(arguments[2]).column->convert_to_full_column_if_const();
- ColumnPtr y_lat_origin =
-
block.get_by_position(arguments[3]).column->convert_to_full_column_if_const();
-
- const auto* x_lng =
check_and_get_column<ColumnFloat64>(x_lng_origin.get());
- const auto* x_lat =
check_and_get_column<ColumnFloat64>(x_lat_origin.get());
- const auto* y_lng =
check_and_get_column<ColumnFloat64>(y_lng_origin.get());
- const auto* y_lat =
check_and_get_column<ColumnFloat64>(y_lat_origin.get());
- CHECK(x_lng && x_lat && y_lng && y_lat);
-
- const auto size = x_lng->size();
+ auto x_lng =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[0]).column);
+ auto x_lat =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[1]).column);
+ auto y_lng =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[2]).column);
+ auto y_lat =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[3]).column);
+
+ const auto size = x_lng.size();
auto res = ColumnFloat64::create();
res->reserve(size);
auto null_map = ColumnUInt8::create(size, 0);
auto& null_map_data = null_map->get_data();
for (int row = 0; row < size; ++row) {
double distance = 0;
- if (!GeoPoint::ComputeDistance(x_lng->get_element(row),
x_lat->get_element(row),
- y_lng->get_element(row),
y_lat->get_element(row),
- &distance)) {
+ if (!GeoPoint::ComputeDistance(x_lng.value_at(row),
x_lat.value_at(row),
+ y_lng.value_at(row),
y_lat.value_at(row), &distance)) {
null_map_data[row] = 1;
res->insert_default();
continue;
@@ -288,22 +236,12 @@ struct StAngleSphere {
DCHECK_EQ(arguments.size(), 4);
auto return_type = block.get_data_type(result);
- ColumnPtr x_lng_origin =
-
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
- ColumnPtr x_lat_origin =
-
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
- ColumnPtr y_lng_origin =
-
block.get_by_position(arguments[2]).column->convert_to_full_column_if_const();
- ColumnPtr y_lat_origin =
-
block.get_by_position(arguments[3]).column->convert_to_full_column_if_const();
-
- const auto* x_lng =
check_and_get_column<ColumnFloat64>(x_lng_origin.get());
- const auto* x_lat =
check_and_get_column<ColumnFloat64>(x_lat_origin.get());
- const auto* y_lng =
check_and_get_column<ColumnFloat64>(y_lng_origin.get());
- const auto* y_lat =
check_and_get_column<ColumnFloat64>(y_lat_origin.get());
- CHECK(x_lng && x_lat && y_lng && y_lat);
+ auto x_lng =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[0]).column);
+ auto x_lat =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[1]).column);
+ auto y_lng =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[2]).column);
+ auto y_lat =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[3]).column);
- const auto size = x_lng->size();
+ const auto size = x_lng.size();
auto res = ColumnFloat64::create();
res->reserve(size);
@@ -312,9 +250,8 @@ struct StAngleSphere {
for (int row = 0; row < size; ++row) {
double angle = 0;
- if (!GeoPoint::ComputeAngleSphere(x_lng->get_element(row),
x_lat->get_element(row),
- y_lng->get_element(row),
y_lat->get_element(row),
- &angle)) {
+ if (!GeoPoint::ComputeAngleSphere(x_lng.value_at(row),
x_lat.value_at(row),
+ y_lng.value_at(row),
y_lat.value_at(row), &angle)) {
null_map_data[row] = 1;
res->insert_default();
continue;
@@ -336,10 +273,10 @@ struct StAngle {
DCHECK_EQ(arguments.size(), 3);
auto return_type = block.get_data_type(result);
- auto p1 =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
- auto p2 =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
- auto p3 =
block.get_by_position(arguments[2]).column->convert_to_full_column_if_const();
- const auto size = p1->size();
+ auto p1 =
ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[0]).column);
+ auto p2 =
ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[1]).column);
+ auto p3 =
ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[2]).column);
+ const auto size = p1.size();
auto res = ColumnFloat64::create();
res->reserve(size);
auto null_map = ColumnUInt8::create(size, 0);
@@ -350,7 +287,7 @@ struct StAngle {
GeoPoint point3;
for (int row = 0; row < size; ++row) {
- auto shape_value1 = p1->get_data_at(row);
+ auto shape_value1 = p1.value_at(row);
auto pt1 = point1.decode_from(shape_value1.data,
shape_value1.size);
if (!pt1) {
null_map_data[row] = 1;
@@ -358,14 +295,14 @@ struct StAngle {
continue;
}
- auto shape_value2 = p2->get_data_at(row);
+ auto shape_value2 = p2.value_at(row);
auto pt2 = point2.decode_from(shape_value2.data,
shape_value2.size);
if (!pt2) {
null_map_data[row] = 1;
res->insert_default();
continue;
}
- auto shape_value3 = p3->get_data_at(row);
+ auto shape_value3 = p3.value_at(row);
auto pt3 = point3.decode_from(shape_value3.data,
shape_value3.size);
if (!pt3) {
null_map_data[row] = 1;
@@ -395,84 +332,40 @@ struct StAzimuth {
DCHECK_EQ(arguments.size(), 2);
auto return_type = block.get_data_type(result);
- const auto& [left_column, left_const] =
- unpack_if_const(block.get_by_position(arguments[0]).column);
- const auto& [right_column, right_const] =
- unpack_if_const(block.get_by_position(arguments[1]).column);
+ auto left_col =
ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[0]).column);
+ auto right_col =
+
ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[1]).column);
- const auto size = std::max(left_column->size(), right_column->size());
+ const auto size = left_col.size();
auto res = ColumnFloat64::create();
res->reserve(size);
auto null_map = ColumnUInt8::create(size, 0);
auto& null_map_data = null_map->get_data();
GeoPoint point1;
GeoPoint point2;
- if (left_const) {
- const_vector(left_column, right_column, res, null_map_data, size,
point1, point2);
- } else if (right_const) {
- vector_const(left_column, right_column, res, null_map_data, size,
point1, point2);
- } else {
- vector_vector(left_column, right_column, res, null_map_data, size,
point1, point2);
- }
- block.replace_by_position(result,
- ColumnNullable::create(std::move(res),
std::move(null_map)));
- return Status::OK();
- }
-
- static void loop_do(bool& pt1, bool& pt2, GeoPoint& point1, GeoPoint&
point2,
- ColumnFloat64::MutablePtr& res, NullMap& null_map, int
row) {
- if (!(pt1 && pt2)) {
- null_map[row] = 1;
- res->insert_default();
- return;
- }
-
- double angle = 0;
- if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) {
- null_map[row] = 1;
- res->insert_default();
- return;
- }
- res->insert_value(angle);
- }
-
- static void const_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
- ColumnFloat64::MutablePtr& res, NullMap&
null_map, size_t size,
- GeoPoint& point1, GeoPoint& point2) {
- auto shape_value1 = left_column->get_data_at(0);
- auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size);
for (int row = 0; row < size; ++row) {
- auto shape_value2 = right_column->get_data_at(row);
- auto pt2 = point2.decode_from(shape_value2.data,
shape_value2.size);
-
- loop_do(pt1, pt2, point1, point2, res, null_map, row);
- }
- }
-
- static void vector_const(const ColumnPtr& left_column, const ColumnPtr&
right_column,
- ColumnFloat64::MutablePtr& res, NullMap&
null_map, size_t size,
- GeoPoint& point1, GeoPoint& point2) {
- auto shape_value2 = right_column->get_data_at(0);
- auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size);
- for (int row = 0; row < size; ++row) {
- auto shape_value1 = left_column->get_data_at(row);
+ auto shape_value1 = left_col.value_at(row);
auto pt1 = point1.decode_from(shape_value1.data,
shape_value1.size);
-
- loop_do(pt1, pt2, point1, point2, res, null_map, row);
- }
- }
-
- static void vector_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
- ColumnFloat64::MutablePtr& res, NullMap&
null_map, size_t size,
- GeoPoint& point1, GeoPoint& point2) {
- for (int row = 0; row < size; ++row) {
- auto shape_value1 = left_column->get_data_at(row);
- auto pt1 = point1.decode_from(shape_value1.data,
shape_value1.size);
- auto shape_value2 = right_column->get_data_at(row);
+ auto shape_value2 = right_col.value_at(row);
auto pt2 = point2.decode_from(shape_value2.data,
shape_value2.size);
- loop_do(pt1, pt2, point1, point2, res, null_map, row);
+ if (!(pt1 && pt2)) {
+ null_map_data[row] = 1;
+ res->insert_default();
+ continue;
+ }
+
+ double angle = 0;
+ if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) {
+ null_map_data[row] = 1;
+ res->insert_default();
+ continue;
+ }
+ res->insert_value(angle);
}
+ block.replace_by_position(result,
+ ColumnNullable::create(std::move(res),
std::move(null_map)));
+ return Status::OK();
}
};
@@ -566,15 +459,12 @@ struct StCircle {
DCHECK_EQ(arguments.size(), 3);
auto return_type = block.get_data_type(result);
auto center_lng =
-
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
- const auto* center_lng_ptr = assert_cast<const
ColumnFloat64*>(center_lng.get());
+
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[0]).column);
auto center_lat =
-
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
- const auto* center_lat_ptr = assert_cast<const
ColumnFloat64*>(center_lat.get());
- auto radius =
block.get_by_position(arguments[2]).column->convert_to_full_column_if_const();
- const auto* radius_ptr = assert_cast<const
ColumnFloat64*>(radius.get());
+
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[1]).column);
+ auto radius =
ColumnView<TYPE_DOUBLE>::create(block.get_by_position(arguments[2]).column);
- const auto size = center_lng->size();
+ const auto size = center_lng.size();
auto res = ColumnString::create();
@@ -584,9 +474,9 @@ struct StCircle {
GeoCircle circle;
std::string buf;
for (int row = 0; row < size; ++row) {
- auto lng_value = center_lng_ptr->get_element(row);
- auto lat_value = center_lat_ptr->get_element(row);
- auto radius_value = radius_ptr->get_element(row);
+ auto lng_value = center_lng.value_at(row);
+ auto lat_value = center_lat.value_at(row);
+ auto radius_value = radius.value_at(row);
auto value = circle.init(lng_value, lat_value, radius_value);
if (value != GEO_PARSE_OK) {
@@ -613,75 +503,35 @@ struct StRelationFunction {
static Status execute(Block& block, const ColumnNumbers& arguments, size_t
result) {
DCHECK_EQ(arguments.size(), 2);
auto return_type = block.get_data_type(result);
- const auto& [left_column, left_const] =
- unpack_if_const(block.get_by_position(arguments[0]).column);
- const auto& [right_column, right_const] =
- unpack_if_const(block.get_by_position(arguments[1]).column);
+ auto left_col =
ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[0]).column);
+ auto right_col =
+
ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[1]).column);
- const auto size = std::max(left_column->size(), right_column->size());
+ const auto size = left_col.size();
auto res = ColumnUInt8::create(size, 0);
auto null_map = ColumnUInt8::create(size, 0);
auto& null_map_data = null_map->get_data();
- if (left_const) {
- const_vector(left_column, right_column, res, null_map_data, size);
- } else if (right_const) {
- vector_const(left_column, right_column, res, null_map_data, size);
- } else {
- vector_vector(left_column, right_column, res, null_map_data, size);
- }
- block.replace_by_position(result,
- ColumnNullable::create(std::move(res),
std::move(null_map)));
- return Status::OK();
- }
-
- static void loop_do(StringRef& lhs_value, StringRef& rhs_value,
- std::vector<std::unique_ptr<GeoShape>>& shapes,
- ColumnUInt8::MutablePtr& res, NullMap& null_map, int
row) {
- StringRef* strs[2] = {&lhs_value, &rhs_value};
- for (int i = 0; i < 2; ++i) {
- std::unique_ptr<GeoShape>
shape(GeoShape::from_encoded(strs[i]->data, strs[i]->size));
- shapes[i] = std::move(shape);
- if (!shapes[i]) {
- null_map[row] = 1;
- break;
- }
- }
- if (shapes[0] && shapes[1]) {
- auto relation_value = Func::evaluate(shapes[0].get(),
shapes[1].get());
- res->get_data()[row] = relation_value;
- }
- }
-
- static void const_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
- ColumnUInt8::MutablePtr& res, NullMap& null_map,
const size_t size) {
- auto lhs_value = left_column->get_data_at(0);
- std::vector<std::unique_ptr<GeoShape>> shapes(2);
for (int row = 0; row < size; ++row) {
- auto rhs_value = right_column->get_data_at(row);
- loop_do(lhs_value, rhs_value, shapes, res, null_map, row);
- }
- }
+ auto lhs_value = left_col.value_at(row);
+ auto rhs_value = right_col.value_at(row);
- static void vector_const(const ColumnPtr& left_column, const ColumnPtr&
right_column,
- ColumnUInt8::MutablePtr& res, NullMap& null_map,
const size_t size) {
- auto rhs_value = right_column->get_data_at(0);
- std::vector<std::unique_ptr<GeoShape>> shapes(2);
- for (int row = 0; row < size; ++row) {
- auto lhs_value = left_column->get_data_at(row);
- loop_do(lhs_value, rhs_value, shapes, res, null_map, row);
- }
- }
+ std::unique_ptr<GeoShape> shape1(
+ GeoShape::from_encoded(lhs_value.data, lhs_value.size));
+ std::unique_ptr<GeoShape> shape2(
+ GeoShape::from_encoded(rhs_value.data, rhs_value.size));
- static void vector_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
- ColumnUInt8::MutablePtr& res, NullMap& null_map,
const size_t size) {
- std::vector<std::unique_ptr<GeoShape>> shapes(2);
- for (int row = 0; row < size; ++row) {
- auto lhs_value = left_column->get_data_at(row);
- auto rhs_value = right_column->get_data_at(row);
- loop_do(lhs_value, rhs_value, shapes, res, null_map, row);
+ if (!shape1 || !shape2) {
+ null_map_data[row] = 1;
+ continue;
+ }
+ auto relation_value = Func::evaluate(shape1.get(), shape2.get());
+ res->get_data()[row] = relation_value;
}
+ block.replace_by_position(result,
+ ColumnNullable::create(std::move(res),
std::move(null_map)));
+ return Status::OK();
}
};
diff --git a/be/test/vec/columns/column_execute_util_test.cpp
b/be/test/vec/columns/column_execute_util_test.cpp
new file mode 100644
index 00000000000..1103c212378
--- /dev/null
+++ b/be/test/vec/columns/column_execute_util_test.cpp
@@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/columns/column_execute_util.h"
+
+#include <gtest/gtest.h>
+
+#include "runtime/primitive_type.h"
+#include "testutil/column_helper.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_string.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+
+namespace doris::vectorized {
+
+// ==================== ColumnElementView Tests ====================
+
+TEST(ColumnExecuteUtilTest, ColumnElementView_int32) {
+ auto col = ColumnHelper::create_column<DataTypeInt32>({10, 20, 30});
+ ColumnElementView<TYPE_INT> view(*col);
+
+ EXPECT_EQ(view.get_element(0), 10);
+ EXPECT_EQ(view.get_element(1), 20);
+ EXPECT_EQ(view.get_element(2), 30);
+}
+
+TEST(ColumnExecuteUtilTest, ColumnElementView_int64) {
+ auto col = ColumnHelper::create_column<DataTypeInt64>({100, 200, 300});
+ ColumnElementView<TYPE_BIGINT> view(*col);
+
+ EXPECT_EQ(view.get_element(0), 100);
+ EXPECT_EQ(view.get_element(1), 200);
+ EXPECT_EQ(view.get_element(2), 300);
+}
+
+TEST(ColumnExecuteUtilTest, ColumnElementView_double) {
+ auto col = ColumnHelper::create_column<DataTypeFloat64>({1.5, 2.5, 3.5});
+ ColumnElementView<TYPE_DOUBLE> view(*col);
+
+ EXPECT_DOUBLE_EQ(view.get_element(0), 1.5);
+ EXPECT_DOUBLE_EQ(view.get_element(1), 2.5);
+ EXPECT_DOUBLE_EQ(view.get_element(2), 3.5);
+}
+
+TEST(ColumnExecuteUtilTest, ColumnElementView_string) {
+ auto col = ColumnHelper::create_column<DataTypeString>({"hello", "world",
"test"});
+ ColumnElementView<TYPE_STRING> view(*col);
+
+ EXPECT_EQ(view.get_element(0).to_string(), "hello");
+ EXPECT_EQ(view.get_element(1).to_string(), "world");
+ EXPECT_EQ(view.get_element(2).to_string(), "test");
+}
+
+// ==================== ColumnView Tests ====================
+
+TEST(ColumnExecuteUtilTest, ColumnView_non_nullable_non_const) {
+ auto col = ColumnHelper::create_column<DataTypeInt32>({1, 2, 3});
+ auto view = ColumnView<TYPE_INT>::create(col);
+
+ EXPECT_EQ(view.count, 3);
+ EXPECT_FALSE(view.is_const);
+ EXPECT_EQ(view.null_map, nullptr);
+
+ EXPECT_FALSE(view.is_null_at(0));
+ EXPECT_FALSE(view.is_null_at(1));
+ EXPECT_FALSE(view.is_null_at(2));
+
+ EXPECT_EQ(view.value_at(0), 1);
+ EXPECT_EQ(view.value_at(1), 2);
+ EXPECT_EQ(view.value_at(2), 3);
+}
+
+TEST(ColumnExecuteUtilTest, ColumnView_nullable) {
+ auto col = ColumnHelper::create_nullable_column<DataTypeInt32>({10, 20,
30}, {0, 1, 0});
+ auto view = ColumnView<TYPE_INT>::create(col);
+
+ EXPECT_EQ(view.count, 3);
+ EXPECT_FALSE(view.is_const);
+ EXPECT_NE(view.null_map, nullptr);
+
+ EXPECT_FALSE(view.is_null_at(0));
+ EXPECT_TRUE(view.is_null_at(1));
+ EXPECT_FALSE(view.is_null_at(2));
+
+ EXPECT_EQ(view.value_at(0), 10);
+ EXPECT_EQ(view.value_at(2), 30);
+}
+
+TEST(ColumnExecuteUtilTest, ColumnView_const_column) {
+ auto col = ColumnHelper::create_column<DataTypeInt32>({42});
+ ColumnPtr const_col = ColumnConst::create(col, 5);
+ auto view = ColumnView<TYPE_INT>::create(const_col);
+
+ EXPECT_EQ(view.count, 5);
+ EXPECT_TRUE(view.is_const);
+ EXPECT_EQ(view.null_map, nullptr);
+
+ // All positions should return the same value
+ for (size_t i = 0; i < 5; ++i) {
+ EXPECT_FALSE(view.is_null_at(i));
+ EXPECT_EQ(view.value_at(i), 42);
+ }
+}
+
+TEST(ColumnExecuteUtilTest, ColumnView_const_nullable_column) {
+ auto col = ColumnHelper::create_nullable_column<DataTypeInt32>({100}, {0});
+ ColumnPtr const_col = ColumnConst::create(col, 3);
+ auto view = ColumnView<TYPE_INT>::create(const_col);
+
+ EXPECT_EQ(view.count, 3);
+ EXPECT_TRUE(view.is_const);
+ EXPECT_NE(view.null_map, nullptr);
+
+ for (size_t i = 0; i < 3; ++i) {
+ EXPECT_FALSE(view.is_null_at(i));
+ EXPECT_EQ(view.value_at(i), 100);
+ }
+}
+
+TEST(ColumnExecuteUtilTest, ColumnView_const_nullable_null_column) {
+ auto col = ColumnHelper::create_nullable_column<DataTypeInt32>({0}, {1});
+ ColumnPtr const_col = ColumnConst::create(col, 3);
+ auto view = ColumnView<TYPE_INT>::create(const_col);
+
+ EXPECT_EQ(view.count, 3);
+ EXPECT_TRUE(view.is_const);
+ EXPECT_NE(view.null_map, nullptr);
+
+ for (size_t i = 0; i < 3; ++i) {
+ EXPECT_TRUE(view.is_null_at(i));
+ }
+}
+
+TEST(ColumnExecuteUtilTest, ColumnView_string) {
+ auto col = ColumnHelper::create_column<DataTypeString>({"a", "bb", "ccc"});
+ auto view = ColumnView<TYPE_STRING>::create(col);
+
+ EXPECT_EQ(view.count, 3);
+ EXPECT_FALSE(view.is_const);
+ EXPECT_EQ(view.null_map, nullptr);
+
+ EXPECT_EQ(view.value_at(0).to_string(), "a");
+ EXPECT_EQ(view.value_at(1).to_string(), "bb");
+ EXPECT_EQ(view.value_at(2).to_string(), "ccc");
+}
+
+} // namespace doris::vectorized
diff --git
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
index e8c4fea5137..d604e6acb20 100644
---
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
+++
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
@@ -1972,3 +1972,13 @@
-- !const_select --
["2022-05-18 12:00:00.123", "2022-05-18 12:05:00.123", "2022-05-18
12:10:00.123", "2022-05-18 12:15:00.123"]
+-- !array_join_1 --
+1 a0000b0000c
+2 c11111d11111e
+3 f222222g
+
+-- !array_join_2 --
+1 a0000b0000c
+2 c11111d11111e
+3 f222222???222222g
+
diff --git
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
index 92d9e0d95f2..d960abb4929 100644
---
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
+++
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
@@ -435,4 +435,40 @@ suite("test_array_functions") {
qt_const_select "select sequence(1, 10, 0); "
qt_const_select "select sequence(cast('2022-05-15 12:00:00' as
datetimev2(0)), cast('2022-05-17 12:00:00' as datetimev2(0)), interval 0 day); "
qt_const_select """select sequence("2022-05-18T12:00:00.123",
"2022-05-18T12:16:00.123", interval 5 minute); """
+
+
+
+ sql"""
+ DROP TABLE IF EXISTS db_test_array_join;
+ """
+
+
+ sql"""
+ CREATE TABLE IF NOT EXISTS db_test_array_join (
+ `id` INT(11) NUll COMMENT "",
+ `sarr` array<String> NUll COMMENT "",
+ `s1` String NUll COMMENT "",
+ `s2` String NUll COMMENT ""
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+);
+
+ """
+
+ sql"""
+insert into db_test_array_join values(1,["a","b","c"],"0000","?") ,
(2,["c","d","e"] , "11111" , "??") , (3,["f",null,"g"] ,"222222", "???");
+ """
+
+ qt_array_join_1 """
+ select id, array_join(sarr,s1) from db_test_array_join order by id;
+ """
+
+ qt_array_join_2 """
+ select id , array_join(sarr,s1,s2) from db_test_array_join order by id;
+ """
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]