zclllyybb commented on code in PR #60170:
URL: https://github.com/apache/doris/pull/60170#discussion_r2725631468
##########
be/src/vec/functions/functions_geo.cpp:
##########
@@ -858,6 +858,164 @@ struct StAsBinary {
}
};
+struct StLength {
+ static constexpr auto NAME = "st_length";
+ static const size_t NUM_ARGS = 1;
+ using Type = DataTypeFloat64;
+ static Status execute(Block& block, const ColumnNumbers& arguments, size_t
result) {
+ DCHECK_EQ(arguments.size(), 1);
+ auto return_type = block.get_data_type(result);
+
+ auto col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+ const auto size = col->size();
+ auto res = ColumnFloat64::create();
+ res->reserve(size);
+ auto null_map = ColumnUInt8::create(size, 0);
+ auto& null_map_data = null_map->get_data();
+
+ std::unique_ptr<GeoShape> shape;
+ for (int row = 0; row < size; ++row) {
+ auto shape_value = col->get_data_at(row);
+ shape = GeoShape::from_encoded(shape_value.data, shape_value.size);
+ if (!shape) {
+ null_map_data[row] = 1;
+ res->insert_default();
+ continue;
+ }
+
+ double length = shape->Length();
+ res->insert_value(length);
+ }
+
+ block.replace_by_position(result,
+ ColumnNullable::create(std::move(res),
std::move(null_map)));
+ return Status::OK();
+ }
+};
+
+struct StGeometryType {
+ static constexpr auto NAME = "st_geometrytype";
+ static const size_t NUM_ARGS = 1;
+ using Type = DataTypeString;
+ static Status execute(Block& block, const ColumnNumbers& arguments, size_t
result) {
+ DCHECK_EQ(arguments.size(), 1);
+ auto return_type = block.get_data_type(result);
+
+ auto col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+ const auto size = col->size();
+ auto res = ColumnString::create();
+ auto null_map = ColumnUInt8::create(size, 0);
+ auto& null_map_data = null_map->get_data();
+
+ std::unique_ptr<GeoShape> shape;
+ for (int row = 0; row < size; ++row) {
+ auto shape_value = col->get_data_at(row);
+ shape = GeoShape::from_encoded(shape_value.data, shape_value.size);
+ if (!shape) {
+ null_map_data[row] = 1;
+ res->insert_default();
+ continue;
+ }
+
+ auto geo_type = shape->GeometryType();
+ res->insert_data(geo_type.data(), geo_type.size());
+ }
+
+ block.replace_by_position(result,
+ ColumnNullable::create(std::move(res),
std::move(null_map)));
+ return Status::OK();
+ }
+};
+
+template <typename Func>
+struct StDualGeoDoubleFunction {
+ static constexpr auto NAME = Func::NAME;
+ static const size_t NUM_ARGS = 2;
+ using Type = DataTypeFloat64;
+
+ static Status execute(Block& block, const ColumnNumbers& arguments, size_t
result) {
+ DCHECK_EQ(arguments.size(), 2);
+ auto return_type = block.get_data_type(result);
+ const auto& [left_column, left_const] =
+ unpack_if_const(block.get_by_position(arguments[0]).column);
+ const auto& [right_column, right_const] =
+ unpack_if_const(block.get_by_position(arguments[1]).column);
+
+ const auto size = std::max(left_column->size(), right_column->size());
+
+ auto res = ColumnFloat64::create();
+ res->reserve(size);
+ auto null_map = ColumnUInt8::create(size, 0);
+ auto& null_map_data = null_map->get_data();
+
+ if (left_const) {
+ const_vector(left_column, right_column, res, null_map_data, size);
+ } else if (right_const) {
+ vector_const(left_column, right_column, res, null_map_data, size);
+ } else {
+ vector_vector(left_column, right_column, res, null_map_data, size);
+ }
+ block.replace_by_position(result,
+ ColumnNullable::create(std::move(res),
std::move(null_map)));
+ return Status::OK();
+ }
+
+ static void loop_do(StringRef& lhs_value, StringRef& rhs_value,
+ std::vector<std::unique_ptr<GeoShape>>& shapes,
+ ColumnFloat64::MutablePtr& res, NullMap& null_map, int
row) {
+ StringRef* strs[2] = {&lhs_value, &rhs_value};
+ for (int i = 0; i < 2; ++i) {
+ std::unique_ptr<GeoShape>
shape(GeoShape::from_encoded(strs[i]->data, strs[i]->size));
+ shapes[i] = std::move(shape);
+ if (!shapes[i]) {
+ null_map[row] = 1;
+ res->insert_default();
+ return;
+ }
+ }
+ if (shapes[0] && shapes[1]) {
+ double distance = Func::compute(shapes[0].get(), shapes[1].get());
+ res->insert_value(distance);
+ }
+ }
+
+ static void const_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ ColumnFloat64::MutablePtr& res, NullMap&
null_map, const size_t size) {
+ auto lhs_value = left_column->get_data_at(0);
+ std::vector<std::unique_ptr<GeoShape>> shapes(2);
+ for (int row = 0; row < size; ++row) {
+ auto rhs_value = right_column->get_data_at(row);
+ loop_do(lhs_value, rhs_value, shapes, res, null_map, row);
+ }
+ }
+
+ static void vector_const(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ ColumnFloat64::MutablePtr& res, NullMap&
null_map, const size_t size) {
+ auto rhs_value = right_column->get_data_at(0);
+ std::vector<std::unique_ptr<GeoShape>> shapes(2);
+ for (int row = 0; row < size; ++row) {
+ auto lhs_value = left_column->get_data_at(row);
+ loop_do(lhs_value, rhs_value, shapes, res, null_map, row);
+ }
+ }
+
+ static void vector_vector(const ColumnPtr& left_column, const ColumnPtr&
right_column,
+ ColumnFloat64::MutablePtr& res, NullMap&
null_map,
+ const size_t size) {
+ std::vector<std::unique_ptr<GeoShape>> shapes(2);
+ for (int row = 0; row < size; ++row) {
+ auto lhs_value = left_column->get_data_at(row);
Review Comment:
all `get_data_at` here may be virtual function. you can assert_cast column
ptrs and pass with concrete types
##########
be/src/vec/functions/functions_geo.cpp:
##########
@@ -858,6 +858,164 @@ struct StAsBinary {
}
};
+struct StLength {
+ static constexpr auto NAME = "st_length";
+ static const size_t NUM_ARGS = 1;
+ using Type = DataTypeFloat64;
+ static Status execute(Block& block, const ColumnNumbers& arguments, size_t
result) {
+ DCHECK_EQ(arguments.size(), 1);
+ auto return_type = block.get_data_type(result);
+
+ auto col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+ const auto size = col->size();
+ auto res = ColumnFloat64::create();
+ res->reserve(size);
+ auto null_map = ColumnUInt8::create(size, 0);
+ auto& null_map_data = null_map->get_data();
+
+ std::unique_ptr<GeoShape> shape;
+ for (int row = 0; row < size; ++row) {
+ auto shape_value = col->get_data_at(row);
+ shape = GeoShape::from_encoded(shape_value.data, shape_value.size);
+ if (!shape) {
+ null_map_data[row] = 1;
+ res->insert_default();
+ continue;
+ }
+
+ double length = shape->Length();
+ res->insert_value(length);
+ }
+
+ block.replace_by_position(result,
+ ColumnNullable::create(std::move(res),
std::move(null_map)));
+ return Status::OK();
+ }
+};
+
+struct StGeometryType {
+ static constexpr auto NAME = "st_geometrytype";
+ static const size_t NUM_ARGS = 1;
+ using Type = DataTypeString;
+ static Status execute(Block& block, const ColumnNumbers& arguments, size_t
result) {
+ DCHECK_EQ(arguments.size(), 1);
+ auto return_type = block.get_data_type(result);
+
+ auto col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+ const auto size = col->size();
+ auto res = ColumnString::create();
+ auto null_map = ColumnUInt8::create(size, 0);
+ auto& null_map_data = null_map->get_data();
+
+ std::unique_ptr<GeoShape> shape;
+ for (int row = 0; row < size; ++row) {
+ auto shape_value = col->get_data_at(row);
+ shape = GeoShape::from_encoded(shape_value.data, shape_value.size);
+ if (!shape) {
+ null_map_data[row] = 1;
+ res->insert_default();
+ continue;
+ }
+
+ auto geo_type = shape->GeometryType();
+ res->insert_data(geo_type.data(), geo_type.size());
+ }
+
+ block.replace_by_position(result,
+ ColumnNullable::create(std::move(res),
std::move(null_map)));
+ return Status::OK();
+ }
+};
+
+template <typename Func>
+struct StDualGeoDoubleFunction {
Review Comment:
could other exists functions use this template?
##########
be/src/geo/st_distance.cpp:
##########
@@ -0,0 +1,320 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <s2/s2cap.h>
+#include <s2/s2earth.h>
+#include <s2/s2loop.h>
+#include <s2/s2polygon.h>
+#include <s2/s2polyline.h>
+
+#include <cmath>
+#include <limits>
+
+#include "geo/geo_types.h"
+
+namespace doris {
+
+// Helper function to compute distance from a point to a line segment
+double distance_point_to_segment(const S2Point& point, const S2Point&
line_start,
+ const S2Point& line_end) {
+ S2LatLng point_ll = S2LatLng(point);
+ S2LatLng start_ll = S2LatLng(line_start);
+ S2LatLng end_ll = S2LatLng(line_end);
+
+ double px = point_ll.lng().degrees();
+ double py = point_ll.lat().degrees();
+ double x1 = start_ll.lng().degrees();
+ double y1 = start_ll.lat().degrees();
+ double x2 = end_ll.lng().degrees();
+ double y2 = end_ll.lat().degrees();
+
+ double dx = x2 - x1;
+ double dy = y2 - y1;
+
+ if (dx == 0 && dy == 0) {
+ return S2Earth::GetDistanceMeters(point_ll, start_ll);
+ }
+
+ double t = ((px - x1) * dx + (py - y1) * dy) / (dx * dx + dy * dy);
+ t = std::max(0.0, std::min(1.0, t));
+
+ double closest_x = x1 + t * dx;
+ double closest_y = y1 + t * dy;
+
+ S2LatLng closest_point = S2LatLng::FromDegrees(closest_y, closest_x);
+ return S2Earth::GetDistanceMeters(point_ll, closest_point);
+}
+
+// Helper function to compute distance from a point to a polyline
+double distance_point_to_polyline(const S2Point& point, const S2Polyline*
polyline) {
+ double min_distance = std::numeric_limits<double>::max();
+
+ for (int i = 0; i < polyline->num_vertices() - 1; ++i) {
+ const S2Point& p1 = polyline->vertex(i);
+ const S2Point& p2 = polyline->vertex(i + 1);
+
+ double dist = distance_point_to_segment(point, p1, p2);
+ min_distance = std::min(min_distance, dist);
+ }
+
+ return min_distance;
+}
+
+// Helper function to compute distance from a point to a polygon
+double distance_point_to_polygon(const S2Point& point, const S2Polygon*
polygon) {
+ // Check if point is inside polygon
+ if (polygon->Contains(point)) {
+ return 0.0;
+ }
+
+ // Find minimum distance to polygon boundary
+ double min_distance = std::numeric_limits<double>::max();
+
+ for (int i = 0; i < polygon->num_loops(); ++i) {
+ const S2Loop* loop = polygon->loop(i);
+
+ for (int j = 0; j < loop->num_vertices(); ++j) {
+ const S2Point& p1 = loop->vertex(j);
+ const S2Point& p2 = loop->vertex((j + 1) % loop->num_vertices());
+
+ double dist = distance_point_to_segment(point, p1, p2);
+ min_distance = std::min(min_distance, dist);
+ }
+ }
+
+ return min_distance;
+}
+
+double GeoPoint::Distance(const GeoShape* rhs) const {
+ // rhs is guaranteed to be valid by StDualGeoDoubleFunction
(functions_geo.cpp) which checks shapes[0] && shapes[1]
+ switch (rhs->type()) {
+ case GEO_SHAPE_POINT: {
+ const GeoPoint* point = static_cast<const GeoPoint*>(rhs);
+ S2LatLng this_ll = S2LatLng(*_point);
+ S2LatLng other_ll = S2LatLng(*point->point());
+ return S2Earth::GetDistanceMeters(this_ll, other_ll);
+ }
+ case GEO_SHAPE_LINE_STRING: {
+ const GeoLine* line = static_cast<const GeoLine*>(rhs);
+ return distance_point_to_polyline(*_point, line->polyline());
+ }
+ case GEO_SHAPE_POLYGON: {
+ const GeoPolygon* polygon = static_cast<const GeoPolygon*>(rhs);
+ return distance_point_to_polygon(*_point, polygon->polygon());
+ }
+ case GEO_SHAPE_CIRCLE: {
+ const GeoCircle* circle = static_cast<const GeoCircle*>(rhs);
+ S2LatLng this_ll = S2LatLng(*_point);
+ S2LatLng center_ll = S2LatLng(circle->circle()->center());
+ double dist_to_center = S2Earth::GetDistanceMeters(this_ll, center_ll);
+ double circle_radius = S2Earth::ToMeters(circle->circle()->radius());
+
+ // Distance from point to circle is distance to center minus radius
+ return std::max(0.0, dist_to_center - circle_radius);
+ }
+ case GEO_SHAPE_MULTI_POLYGON: {
+ return rhs->Distance(this); // Delegate to MultiPolygon's
implementation
+ }
+ default:
+ return -1.0;
+ }
+}
+
+double GeoLine::Distance(const GeoShape* rhs) const {
+ // rhs is guaranteed to be valid by StDualGeoDoubleFunction
(functions_geo.cpp) which checks shapes[0] && shapes[1]
+ switch (rhs->type()) {
+ case GEO_SHAPE_POINT: {
+ const GeoPoint* point = static_cast<const GeoPoint*>(rhs);
+ return distance_point_to_polyline(*point->point(), _polyline.get());
+ }
+ case GEO_SHAPE_LINE_STRING: {
Review Comment:
what if line crosses?
##########
be/src/geo/st_distance.cpp:
##########
Review Comment:
those new files' content could also directly add to `geo_types.cpp`
##########
regression-test/suites/nereids_p0/sql_functions/spatial_functions/test_gis_function.groovy:
##########
@@ -323,6 +323,85 @@ suite("test_gis_function") {
qt_sql "SELECT ST_ANGLE_SPHERE(116.35620117, 39.939093, 116.4274406433,
39.9020987219);"
qt_sql "SELECT ST_ANGLE_SPHERE(0, 0, 45, 0);"
+ // ST_Length tests for all geometry types
Review Comment:
need more testcases of data in tables.
##########
be/src/geo/st_length.cpp:
##########
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <s2/s2cap.h>
+#include <s2/s2earth.h>
+#include <s2/s2loop.h>
+#include <s2/s2polygon.h>
+#include <s2/s2polyline.h>
+#include <s2/util/units/length-units.h>
+
+#include <cmath>
+
+#include "geo/geo_types.h"
+
+namespace doris {
+
+double GeoPoint::Length() const {
+ // Point has no length
+ return 0.0;
+}
+
+double GeoLine::Length() const {
+ // GeoLine is always valid with at least 2 vertices (guaranteed by
constructor)
+ double total_length = 0.0;
+ for (int i = 0; i < _polyline->num_vertices() - 1; ++i) {
+ const S2Point& p1 = _polyline->vertex(i);
+ const S2Point& p2 = _polyline->vertex(i + 1);
+
+ S2LatLng lat_lng1(p1);
+ S2LatLng lat_lng2(p2);
+
+ // Calculate distance in meters using S2Earth
+ double distance = S2Earth::GetDistanceMeters(lat_lng1, lat_lng2);
+ total_length += distance;
+ }
+
+ return total_length;
+}
+
+double GeoPolygon::Length() const {
+ // GeoPolygon is always valid with at least one loop (guaranteed by
constructor)
+ double perimeter = 0.0;
+ const S2Loop* outer_loop = _polygon->loop(0);
Review Comment:
why only consider loop0? what if compute with others within holes?
##########
be/src/geo/st_distance.cpp:
##########
@@ -0,0 +1,320 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <s2/s2cap.h>
+#include <s2/s2earth.h>
+#include <s2/s2loop.h>
+#include <s2/s2polygon.h>
+#include <s2/s2polyline.h>
+
+#include <cmath>
+#include <limits>
+
+#include "geo/geo_types.h"
+
+namespace doris {
+
+// Helper function to compute distance from a point to a line segment
+double distance_point_to_segment(const S2Point& point, const S2Point&
line_start,
+ const S2Point& line_end) {
+ S2LatLng point_ll = S2LatLng(point);
+ S2LatLng start_ll = S2LatLng(line_start);
+ S2LatLng end_ll = S2LatLng(line_end);
+
+ double px = point_ll.lng().degrees();
+ double py = point_ll.lat().degrees();
+ double x1 = start_ll.lng().degrees();
+ double y1 = start_ll.lat().degrees();
+ double x2 = end_ll.lng().degrees();
+ double y2 = end_ll.lat().degrees();
+
+ double dx = x2 - x1;
+ double dy = y2 - y1;
+
+ if (dx == 0 && dy == 0) {
+ return S2Earth::GetDistanceMeters(point_ll, start_ll);
+ }
+
+ double t = ((px - x1) * dx + (py - y1) * dy) / (dx * dx + dy * dy);
Review Comment:
it's this a planar projection? I think lat and lng have no absolute relation
with distance. so maybe directly use S2Earth's function?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]