This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new abf753c1ca7 [enhancement]Optimize GeoFunctions for const columns
(#34396)
abf753c1ca7 is described below
commit abf753c1ca7e654a63fb12f7bc9d148fc52b726a
Author: koarz <[email protected]>
AuthorDate: Thu May 16 14:37:47 2024 +0800
[enhancement]Optimize GeoFunctions for const columns (#34396)
---
be/src/vec/functions/functions_geo.cpp | 236 ++++++++++++++++++++++-------
be/test/vec/function/function_geo_test.cpp | 151 ++++++++++++------
2 files changed, 280 insertions(+), 107 deletions(-)
diff --git a/be/src/vec/functions/functions_geo.cpp
b/be/src/vec/functions/functions_geo.cpp
index ac6969c582d..172f000928f 100644
--- a/be/src/vec/functions/functions_geo.cpp
+++ b/be/src/vec/functions/functions_geo.cpp
@@ -44,33 +44,70 @@ struct StPoint {
DCHECK_EQ(arguments.size(), 2);
auto return_type = block.get_data_type(result);
- auto column_x =
-
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
- auto column_y =
-
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+ const auto& [left_column, left_const] =
+ unpack_if_const(block.get_by_position(arguments[0]).column);
+ const auto& [right_column, right_const] =
+ unpack_if_const(block.get_by_position(arguments[1]).column);
- const auto size = column_x->size();
+ const auto size = std::max(left_column->size(), right_column->size());
MutableColumnPtr res = return_type->create_column();
GeoPoint point;
std::string buf;
- for (int row = 0; row < size; ++row) {
- auto cur_res =
point.from_coord(column_x->operator[](row).get<Float64>(),
-
column_y->operator[](row).get<Float64>());
- if (cur_res != GEO_PARSE_OK) {
- res->insert_data(nullptr, 0);
- continue;
- }
-
- buf.clear();
- point.encode_to(&buf);
- res->insert_data(buf.data(), buf.size());
+ if (left_const) {
+ const_vector(left_column, right_column, res, size, point, buf);
+ } else if (right_const) {
+ vector_const(left_column, right_column, res, size, point, buf);
+ } else {
+ vector_vector(left_column, right_column, res, size, point, buf);
}
block.replace_by_position(result, std::move(res));
return Status::OK();
}
+
+ static void loop_do(GeoParseStatus& cur_res, MutableColumnPtr& res,
GeoPoint& point,
+ std::string& buf) {
+ if (cur_res != GEO_PARSE_OK) {
+ res->insert_data(nullptr, 0);
+ return;
+ }
+
+ buf.clear();
+ point.encode_to(&buf);
+ res->insert_data(buf.data(), buf.size());
+ }
+
+ static void const_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ MutableColumnPtr& res, const size_t size,
GeoPoint& point,
+ std::string& buf) {
+ double x = left_column->operator[](0).get<Float64>();
+ for (int row = 0; row < size; ++row) {
+ auto cur_res = point.from_coord(x,
right_column->operator[](row).get<Float64>());
+ loop_do(cur_res, res, point, buf);
+ }
+ }
+
+ static void vector_const(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ MutableColumnPtr& res, const size_t size,
GeoPoint& point,
+ std::string& buf) {
+ double y = right_column->operator[](0).get<Float64>();
+ for (int row = 0; row < size; ++row) {
+ auto cur_res =
point.from_coord(right_column->operator[](row).get<Float64>(), y);
+ loop_do(cur_res, res, point, buf);
+ }
+ }
+
+ static void vector_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ MutableColumnPtr& res, const size_t size,
GeoPoint& point,
+ std::string& buf) {
+ for (int row = 0; row < size; ++row) {
+ auto cur_res =
point.from_coord(left_column->operator[](row).get<Float64>(),
+
right_column->operator[](row).get<Float64>());
+ loop_do(cur_res, res, point, buf);
+ }
+ }
};
struct StAsTextName {
@@ -304,37 +341,78 @@ struct StAzimuth {
auto return_type = block.get_data_type(result);
MutableColumnPtr res = return_type->create_column();
- auto p1 =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
- auto p2 =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
- const auto size = p1->size();
+ const auto& [left_column, left_const] =
+ unpack_if_const(block.get_by_position(arguments[0]).column);
+ const auto& [right_column, right_const] =
+ unpack_if_const(block.get_by_position(arguments[1]).column);
+
+ const auto size = std::max(left_column->size(), right_column->size());
GeoPoint point1;
GeoPoint point2;
+ if (left_const) {
+ const_vector(left_column, right_column, res, size, point1, point2);
+ } else if (right_const) {
+ vector_const(left_column, right_column, res, size, point1, point2);
+ } else {
+ vector_vector(left_column, right_column, res, size, point1,
point2);
+ }
+ block.replace_by_position(result, std::move(res));
+ return Status::OK();
+ }
+
+ static void loop_do(bool& pt1, bool& pt2, GeoPoint& point1, GeoPoint&
point2,
+ MutableColumnPtr& res) {
+ if (!(pt1 && pt2)) {
+ res->insert_default();
+ return;
+ }
+
+ double angle = 0;
+ if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) {
+ res->insert_default();
+ return;
+ }
+ res->insert_data(const_cast<const char*>((char*)&angle), 0);
+ }
+ static void const_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ MutableColumnPtr& res, size_t size, GeoPoint&
point1,
+ GeoPoint& point2) {
+ auto shape_value1 = left_column->get_data_at(0);
+ auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size);
for (int row = 0; row < size; ++row) {
- auto shape_value1 = p1->get_data_at(row);
+ auto shape_value2 = right_column->get_data_at(row);
+ auto pt2 = point2.decode_from(shape_value2.data,
shape_value2.size);
+
+ loop_do(pt1, pt2, point1, point2, res);
+ }
+ }
+
+ static void vector_const(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ MutableColumnPtr& res, size_t size, GeoPoint&
point1,
+ GeoPoint& point2) {
+ auto shape_value2 = right_column->get_data_at(0);
+ auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size);
+ for (int row = 0; row < size; ++row) {
+ auto shape_value1 = left_column->get_data_at(row);
auto pt1 = point1.decode_from(shape_value1.data,
shape_value1.size);
- if (!pt1) {
- res->insert_default();
- continue;
- }
- auto shape_value2 = p2->get_data_at(row);
+ loop_do(pt1, pt2, point1, point2, res);
+ }
+ }
+
+ static void vector_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ MutableColumnPtr& res, size_t size, GeoPoint&
point1,
+ GeoPoint& point2) {
+ for (int row = 0; row < size; ++row) {
+ auto shape_value1 = left_column->get_data_at(row);
+ auto pt1 = point1.decode_from(shape_value1.data,
shape_value1.size);
+ auto shape_value2 = right_column->get_data_at(row);
auto pt2 = point2.decode_from(shape_value2.data,
shape_value2.size);
- if (!pt2) {
- res->insert_default();
- continue;
- }
- double angle = 0;
- if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) {
- res->insert_default();
- continue;
- }
- res->insert_data(const_cast<const char*>((char*)&angle), 0);
+ loop_do(pt1, pt2, point1, point2, res);
}
- block.replace_by_position(result, std::move(res));
- return Status::OK();
}
};
@@ -463,34 +541,76 @@ struct StContains {
size_t result) {
DCHECK_EQ(arguments.size(), 2);
auto return_type = block.get_data_type(result);
- auto shape1 =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
- auto shape2 =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+ const auto& [left_column, left_const] =
+ unpack_if_const(block.get_by_position(arguments[0]).column);
+ const auto& [right_column, right_const] =
+ unpack_if_const(block.get_by_position(arguments[1]).column);
+
+ const auto size = std::max(left_column->size(), right_column->size());
- const auto size = shape1->size();
MutableColumnPtr res = return_type->create_column();
+ if (left_const) {
+ const_vector(left_column, right_column, res, size);
+ } else if (right_const) {
+ vector_const(left_column, right_column, res, size);
+ } else {
+ vector_vector(left_column, right_column, res, size);
+ }
+ block.replace_by_position(result, std::move(res));
+ return Status::OK();
+ }
+
+ static void loop_do(StringRef& lhs_value, StringRef& rhs_value,
+ std::vector<std::shared_ptr<GeoShape>>& shapes, int& i,
+ MutableColumnPtr& res) {
+ StringRef* strs[2] = {&lhs_value, &rhs_value};
+ for (i = 0; i < 2; ++i) {
+ shapes[i] =
+
std::shared_ptr<GeoShape>(GeoShape::from_encoded(strs[i]->data, strs[i]->size));
+ if (shapes[i] == nullptr) {
+ res->insert_default();
+ break;
+ }
+ }
+
+ if (i == 2) {
+ auto contains_value = shapes[0]->contains(shapes[1].get());
+ res->insert_data(const_cast<const char*>((char*)&contains_value),
0);
+ }
+ }
+
+ static void const_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ MutableColumnPtr& res, const size_t size) {
int i;
+ auto lhs_value = left_column->get_data_at(0);
std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr};
for (int row = 0; row < size; ++row) {
- auto lhs_value = shape1->get_data_at(row);
- auto rhs_value = shape2->get_data_at(row);
- StringRef* strs[2] = {&lhs_value, &rhs_value};
- for (i = 0; i < 2; ++i) {
- shapes[i] = std::shared_ptr<GeoShape>(
- GeoShape::from_encoded(strs[i]->data, strs[i]->size));
- if (shapes[i] == nullptr) {
- res->insert_default();
- break;
- }
- }
+ auto rhs_value = right_column->get_data_at(row);
+ loop_do(lhs_value, rhs_value, shapes, i, res);
+ }
+ }
- if (i == 2) {
- auto contains_value = shapes[0]->contains(shapes[1].get());
- res->insert_data(const_cast<const
char*>((char*)&contains_value), 0);
- }
+ static void vector_const(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ MutableColumnPtr& res, const size_t size) {
+ int i;
+ auto rhs_value = right_column->get_data_at(0);
+ std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr};
+ for (int row = 0; row < size; ++row) {
+ auto lhs_value = left_column->get_data_at(row);
+ loop_do(lhs_value, rhs_value, shapes, i, res);
+ }
+ }
+
+ static void vector_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ MutableColumnPtr& res, const size_t size) {
+ int i;
+ std::vector<std::shared_ptr<GeoShape>> shapes = {nullptr, nullptr};
+ for (int row = 0; row < size; ++row) {
+ auto lhs_value = left_column->get_data_at(row);
+ auto rhs_value = right_column->get_data_at(row);
+ loop_do(lhs_value, rhs_value, shapes, i, res);
}
- block.replace_by_position(result, std::move(res));
- return Status::OK();
}
static Status open(FunctionContext* context,
FunctionContext::FunctionStateScope scope) {
@@ -638,7 +758,7 @@ struct StAsBinary {
auto return_type = block.get_data_type(result);
MutableColumnPtr res = return_type->create_column();
- auto col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+ auto col = block.get_by_position(arguments[0]).column;
const auto size = col->size();
std::unique_ptr<GeoShape> shape;
diff --git a/be/test/vec/function/function_geo_test.cpp
b/be/test/vec/function/function_geo_test.cpp
index c4b699e8816..0860eae773b 100644
--- a/be/test/vec/function/function_geo_test.cpp
+++ b/be/test/vec/function/function_geo_test.cpp
@@ -40,20 +40,38 @@ using namespace ut_type;
TEST(VGeoFunctionsTest, function_geo_st_point_test) {
std::string func_name = "st_point";
+
+ GeoPoint point;
+ auto cur_res = point.from_coord(24.7, 56.7);
+ EXPECT_TRUE(cur_res == GEO_PARSE_OK);
+ std::string buf;
+ point.encode_to(&buf);
+
+ DataSet data_set = {{{(double)24.7, (double)56.7}, buf},
+ {{Null(), (double)5}, Null()},
+ {{(double)5, Null()}, Null()}};
{
InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64};
- GeoPoint point;
- auto cur_res = point.from_coord(24.7, 56.7);
- EXPECT_TRUE(cur_res == GEO_PARSE_OK);
- std::string buf;
- point.encode_to(&buf);
+ static_cast<void>(check_function<DataTypeString, true>(func_name,
input_types, data_set));
+ }
+ {
+ InputTypeSet input_types = {Consted {TypeIndex::Float64},
TypeIndex::Float64};
- DataSet data_set = {{{(double)24.7, (double)56.7}, buf},
- {{Null(), (double)5}, Null()},
- {{(double)5, Null()}, Null()}};
+ for (const auto& line : data_set) {
+ DataSet const_dataset = {line};
+ static_cast<void>(
+ check_function<DataTypeString, true>(func_name,
input_types, const_dataset));
+ }
+ }
+ {
+ InputTypeSet input_types = {TypeIndex::Float64, Consted
{TypeIndex::Float64}};
- static_cast<void>(check_function<DataTypeString, true>(func_name,
input_types, data_set));
+ for (const auto& line : data_set) {
+ DataSet const_dataset = {line};
+ static_cast<void>(
+ check_function<DataTypeString, true>(func_name,
input_types, const_dataset));
+ }
}
}
@@ -199,61 +217,96 @@ TEST(VGeoFunctionsTest, function_geo_st_angle) {
TEST(VGeoFunctionsTest, function_geo_st_azimuth) {
std::string func_name = "st_azimuth";
+ GeoPoint point1;
+ auto cur_res1 = point1.from_coord(0, 0);
+ EXPECT_TRUE(cur_res1 == GEO_PARSE_OK);
+ GeoPoint point2;
+ auto cur_res2 = point2.from_coord(1, 0);
+ EXPECT_TRUE(cur_res2 == GEO_PARSE_OK);
+
+ std::string buf1;
+ point1.encode_to(&buf1);
+ std::string buf2;
+ point2.encode_to(&buf2);
+
+ DataSet data_set = {{{buf1, buf2}, (double)1.5707963267948966},
+ {{buf1, Null()}, Null()},
+ {{Null(), buf2}, Null()}};
{
InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
- GeoPoint point1;
- auto cur_res1 = point1.from_coord(0, 0);
- EXPECT_TRUE(cur_res1 == GEO_PARSE_OK);
- GeoPoint point2;
- auto cur_res2 = point2.from_coord(1, 0);
- EXPECT_TRUE(cur_res2 == GEO_PARSE_OK);
-
- std::string buf1;
- point1.encode_to(&buf1);
- std::string buf2;
- point2.encode_to(&buf2);
+ static_cast<void>(check_function<DataTypeFloat64, true>(func_name,
input_types, data_set));
+ }
+ {
+ InputTypeSet input_types = {TypeIndex::String, Consted
{TypeIndex::String}};
- DataSet data_set = {{{buf1, buf2}, (double)1.5707963267948966},
- {{buf1, Null()}, Null()},
- {{Null(), buf2}, Null()}};
+ for (const auto& line : data_set) {
+ DataSet const_dataset = {line};
+ static_cast<void>(
+ check_function<DataTypeFloat64, true>(func_name,
input_types, const_dataset));
+ }
+ }
+ {
+ InputTypeSet input_types = {Consted {TypeIndex::String},
TypeIndex::String};
- static_cast<void>(check_function<DataTypeFloat64, true>(func_name,
input_types, data_set));
+ for (const auto& line : data_set) {
+ DataSet const_dataset = {line};
+ static_cast<void>(
+ check_function<DataTypeFloat64, true>(func_name,
input_types, const_dataset));
+ }
}
}
TEST(VGeoFunctionsTest, function_geo_st_contains) {
std::string func_name = "st_contains";
+
+ std::string buf1;
+ std::string buf2;
+ std::string buf3;
+ GeoParseStatus status;
+
+ std::string shape1 = std::string("POLYGON ((0 0, 10 0, 10 10, 0 10, 0
0))");
+ std::unique_ptr<GeoShape> shape(GeoShape::from_wkt(shape1.data(),
shape1.size(), &status));
+ EXPECT_TRUE(status == GEO_PARSE_OK);
+ EXPECT_TRUE(shape != nullptr);
+ shape->encode_to(&buf1);
+
+ GeoPoint point1;
+ status = point1.from_coord(5, 5);
+ EXPECT_TRUE(status == GEO_PARSE_OK);
+ point1.encode_to(&buf2);
+
+ GeoPoint point2;
+ status = point2.from_coord(50, 50);
+ EXPECT_TRUE(status == GEO_PARSE_OK);
+ point2.encode_to(&buf3);
+
+ DataSet data_set = {{{buf1, buf2}, (uint8_t)1},
+ {{buf1, buf3}, (uint8_t)0},
+ {{buf1, Null()}, Null()},
+ {{Null(), buf3}, Null()}};
{
InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
- std::string buf1;
- std::string buf2;
- std::string buf3;
- GeoParseStatus status;
-
- std::string shape1 = std::string("POLYGON ((0 0, 10 0, 10 10, 0 10, 0
0))");
- std::unique_ptr<GeoShape> shape(GeoShape::from_wkt(shape1.data(),
shape1.size(), &status));
- EXPECT_TRUE(status == GEO_PARSE_OK);
- EXPECT_TRUE(shape != nullptr);
- shape->encode_to(&buf1);
-
- GeoPoint point1;
- status = point1.from_coord(5, 5);
- EXPECT_TRUE(status == GEO_PARSE_OK);
- point1.encode_to(&buf2);
-
- GeoPoint point2;
- status = point2.from_coord(50, 50);
- EXPECT_TRUE(status == GEO_PARSE_OK);
- point2.encode_to(&buf3);
+ static_cast<void>(check_function<DataTypeUInt8, true>(func_name,
input_types, data_set));
+ }
+ {
+ InputTypeSet input_types = {Consted {TypeIndex::String},
TypeIndex::String};
- DataSet data_set = {{{buf1, buf2}, (uint8_t)1},
- {{buf1, buf3}, (uint8_t)0},
- {{buf1, Null()}, Null()},
- {{Null(), buf3}, Null()}};
+ for (const auto& line : data_set) {
+ DataSet const_dataset = {line};
+ static_cast<void>(
+ check_function<DataTypeUInt8, true>(func_name,
input_types, const_dataset));
+ }
+ }
+ {
+ InputTypeSet input_types = {TypeIndex::String, Consted
{TypeIndex::String}};
- static_cast<void>(check_function<DataTypeUInt8, true>(func_name,
input_types, data_set));
+ for (const auto& line : data_set) {
+ DataSet const_dataset = {line};
+ static_cast<void>(
+ check_function<DataTypeUInt8, true>(func_name,
input_types, const_dataset));
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]