This is an automated email from the ASF dual-hosted git repository.
csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 780e6683a IMPALA-14573: port critical geospatial functions to c++
(part 1)
780e6683a is described below
commit 780e6683a21dae3622744a82f92e155ae06e13f2
Author: Csaba Ringhofer <[email protected]>
AuthorDate: Thu Nov 20 20:57:25 2025 +0100
IMPALA-14573: port critical geospatial functions to c++ (part 1)
This commit contains the simpler parts from
https://gerrit.cloudera.org/#/c/20602
This mainly means accessors for the header of the binary
format and bounding box check (st_envIntersects).
New tests for not yet covered functions / overloads are also added.
For details of the binary format see be/src/exprs/geo/shape-format.h
Differences from the PR above:
Only a subset of functions are added. The criteria was:
1. the native function must be fully compatible with the Java version*
2. must not rely on (de)serializing the full geometry
3. the function must be tested
1 implies 2 because (de)serialization is not implemented yet in
the original patch for >2d geometries, which would break compatibility
for the Java version for ZYZ/XYM/XYZM geometries.
*: there are 2 known differences:
1. NULL handling: the Java functions return error instead of NULL
when getting a NULL parameter
2. st_envIntersects() doesn't check if the SRID matches - the Java
library looks inconsistant about this
Because the native functions are fairly safe replacements for the Java
ones, they are always used when geospatial_library=HIVE_ESRI.
Change-Id: I0ff950a25320549290a83a3b1c31ce828dd68e3c
Reviewed-on: http://gerrit.cloudera.org:8080/23700
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/CMakeLists.txt | 1 +
be/src/codegen/CMakeLists.txt | 2 +-
be/src/codegen/impala-ir.cc | 1 +
be/src/exprs/CMakeLists.txt | 1 +
.../src/exprs/geo/CMakeLists.txt | 25 +-
be/src/exprs/geo/common.h | 49 +++
be/src/exprs/geo/geospatial-functions-ir.cc | 112 ++++++
be/src/exprs/geo/geospatial-functions.h | 58 +++
be/src/exprs/geo/shape-format.h | 294 +++++++++++++++
be/src/exprs/scalar-expr-evaluator.cc | 2 +
.../impala/compat/HiveEsriGeospatialBuiltins.java | 72 +++-
.../queries/QueryTest/geospatial-esri-extra.test | 394 +++++++++++++++++++++
.../queries/QueryTest/geospatial-esri.test | 1 +
tests/custom_cluster/test_geospatial_library.py | 3 +
tests/query_test/test_geospatial_functions.py | 15 +
15 files changed, 1009 insertions(+), 21 deletions(-)
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 12af823db..cd13365bd 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -531,6 +531,7 @@ set (IMPALA_LIBS
ExecPaimon
Exprs
ExprsIr
+ ExprsGeoIr
GlobalFlags
histogram_proto
ImpalaThrift
diff --git a/be/src/codegen/CMakeLists.txt b/be/src/codegen/CMakeLists.txt
index 327b7c4a4..04fa91fe6 100644
--- a/be/src/codegen/CMakeLists.txt
+++ b/be/src/codegen/CMakeLists.txt
@@ -77,7 +77,7 @@ function(COMPILE_TO_IR_C_ARRAY IR_C_FILE VARNAME)
${CLANG_INCLUDE_FLAGS} ${IR_INPUT_FILES} -o ${IR_TMP_OUTPUT_FILE}
COMMAND ${LLVM_OPT_EXECUTABLE} ${LLVM_OPT_IR_FLAGS} <
${IR_TMP_OUTPUT_FILE} > ${IR_OUTPUT_FILE}
COMMAND rm ${IR_TMP_OUTPUT_FILE}
- DEPENDS ExecIr ExecAvroIr ExecKuduIr ExprsIr RuntimeIr UdfIr UtilIr
${IR_INPUT_FILES}
+ DEPENDS ExecIr ExecAvroIr ExprsGeoIr ExecKuduIr ExprsIr RuntimeIr UdfIr
UtilIr ${IR_INPUT_FILES}
)
# Convert LLVM bytecode to C array.
diff --git a/be/src/codegen/impala-ir.cc b/be/src/codegen/impala-ir.cc
index 81248f8ba..256d2945a 100644
--- a/be/src/codegen/impala-ir.cc
+++ b/be/src/codegen/impala-ir.cc
@@ -49,6 +49,7 @@
#include "exprs/date-functions-ir.cc"
#include "exprs/decimal-functions-ir.cc"
#include "exprs/decimal-operators-ir.cc"
+#include "exprs/geo/geospatial-functions-ir.cc"
#include "exprs/hive-udf-call-ir.cc"
#include "exprs/iceberg-functions-ir.cc"
#include "exprs/in-predicate-ir.cc"
diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt
index 513ba4736..560dbceff 100644
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+add_subdirectory(geo)
# where to put generated libraries
set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs")
diff --git a/tests/query_test/test_geospatial_functions.py
b/be/src/exprs/geo/CMakeLists.txt
similarity index 54%
copy from tests/query_test/test_geospatial_functions.py
copy to be/src/exprs/geo/CMakeLists.txt
index acaad7147..9fccd08da 100644
--- a/tests/query_test/test_geospatial_functions.py
+++ b/be/src/exprs/geo/CMakeLists.txt
@@ -15,18 +15,19 @@
# specific language governing permissions and limitations
# under the License.
-from __future__ import absolute_import, division, print_function
-from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import SkipIfApacheHive
+# where to put generated libraries
+set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs/geo")
+# where to put generated binaries
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs/geo")
-class TestGeospatialFuctions(ImpalaTestSuite):
- """Tests the geospatial builtin functions"""
- @SkipIfApacheHive.feature_not_supported
- def test_esri_geospatial_functions(self, vector):
- self.run_test_case('QueryTest/geospatial-esri', vector)
+add_library(ExprsGeoIr
+ geospatial-functions-ir.cc
+)
+add_dependencies(ExprsGeoIr gen-deps)
- def test_esri_geospatial_planner(self, vector):
- # These tests are not among planner tests because with default flags
- # geospatial builtin functions are not loaded.
- self.run_test_case('QueryTest/geospatial-esri-planner', vector)
+if (BUILD_WITH_NO_TESTS)
+ return()
+endif()
+
+# Add tests here.
diff --git a/be/src/exprs/geo/common.h b/be/src/exprs/geo/common.h
new file mode 100644
index 000000000..08651ca7f
--- /dev/null
+++ b/be/src/exprs/geo/common.h
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "udf/udf.h"
+#include "util/bit-util.h"
+
+namespace impala::geo {
+
+using impala_udf::FunctionContext;
+using impala_udf::StringVal;
+
+// see
https://github.com/Esri/spatial-framework-for-hadoop/blob/v2.2.0/hive/src/main/java/com/esri/hadoop/hive/GeometryUtils.java#L21
+enum OGCType {
+ UNKNOWN = 0,
+ ST_POINT = 1,
+ ST_LINESTRING = 2,
+ ST_POLYGON = 3,
+ ST_MULTIPOINT = 4,
+ ST_MULTILINESTRING = 5,
+ ST_MULTIPOLYGON = 6
+};
+
+constexpr std::array<const char*, ST_MULTIPOLYGON + 1> OGCTypeToStr = {{
+ "UNKNOWN",
+ "ST_POINT",
+ "ST_LINESTRING",
+ "ST_POLYGON",
+ "ST_MULTIPOINT",
+ "ST_MULTILINESTRING",
+ "ST_MULTIPOLYGON"
+}};
+
+} // namespace impala
diff --git a/be/src/exprs/geo/geospatial-functions-ir.cc
b/be/src/exprs/geo/geospatial-functions-ir.cc
new file mode 100644
index 000000000..aa1ae6099
--- /dev/null
+++ b/be/src/exprs/geo/geospatial-functions-ir.cc
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/geo/geospatial-functions.h"
+
+#include "exprs/geo/common.h"
+#include "exprs/geo/shape-format.h"
+#include "runtime/string-value.inline.h"
+#include "udf/udf-internal.h"
+#include "udf/udf.h"
+
+#include "common/names.h"
+
+namespace impala::geo {
+
+// Accessors
+
+DoubleVal GeospatialFunctions::st_X(FunctionContext* ctx, const StringVal&
geom) {
+ OGCType ogc_type;
+ if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
+ if (ogc_type != ST_POINT) return DoubleVal::null(); // Only valid for
ST_POINT.
+ return DoubleVal(getMinX(geom));
+}
+
+DoubleVal GeospatialFunctions::st_Y(FunctionContext* ctx, const StringVal&
geom) {
+ OGCType ogc_type;
+ if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
+ if (ogc_type != ST_POINT) return DoubleVal::null(); // Only valid for
ST_POINT.
+ return DoubleVal(getMinY(geom));
+}
+
+DoubleVal GeospatialFunctions::st_MinX(FunctionContext* ctx, const StringVal&
geom) {
+ OGCType ogc_type;
+ if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
+ return DoubleVal(getMinX(geom));
+}
+
+DoubleVal GeospatialFunctions::st_MinY(FunctionContext* ctx, const StringVal&
geom) {
+ OGCType ogc_type;
+ if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
+ return DoubleVal(getMinY(geom));
+}
+
+DoubleVal GeospatialFunctions::st_MaxX(FunctionContext* ctx, const StringVal&
geom) {
+ OGCType ogc_type;
+ if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
+ if (ogc_type == ST_POINT) return DoubleVal(getMinX(geom));
+ return DoubleVal(getMaxX(geom));
+}
+
+DoubleVal GeospatialFunctions::st_MaxY(FunctionContext* ctx, const StringVal&
geom) {
+ OGCType ogc_type;
+ if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
+ if (ogc_type == ST_POINT) return DoubleVal(getMinY(geom));
+ return DoubleVal(getMaxY(geom));
+}
+
+StringVal GeospatialFunctions::st_GeometryType(FunctionContext* ctx,
+ const StringVal& geom) {
+ OGCType ogc_type;
+ if (!ParseHeader(ctx, geom, &ogc_type)) return StringVal::null();
+ const char* name = getGeometryType(ogc_type);
+
+ return StringVal(name);
+}
+
+IntVal GeospatialFunctions::st_Srid(FunctionContext* ctx, const StringVal&
geom) {
+ OGCType ogc_type;
+ if (!ParseHeader(ctx, geom, &ogc_type)) return IntVal::null();
+ return getSrid(geom);
+}
+
+StringVal GeospatialFunctions::st_SetSrid(FunctionContext* ctx, const
StringVal& geom,
+ const IntVal& srid) {
+ if (srid.is_null) return geom;
+ OGCType ogc_type;
+ if (!ParseHeader(ctx, geom, &ogc_type)) return StringVal::null();
+
+ StringVal res = StringVal::CopyFrom(ctx, geom.ptr, geom.len);
+ setSrid(res, srid.val);
+ return res;
+}
+
+// Predicates
+
+BooleanVal GeospatialFunctions::st_EnvIntersects(
+ FunctionContext* ctx, const StringVal& lhs_geom,const StringVal& rhs_geom)
{
+ OGCType lhs_type, rhs_type;
+ // TODO: compare srid? The ESRI UDF does it, but it is not done in other
relations:
+ //
https://github.com/apache/hive/blob/rel/release-4.2.0/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/ST_EnvIntersects.java#L63
+ if (!ParseHeader(ctx, lhs_geom, &lhs_type) || !ParseHeader(ctx, rhs_geom,
&rhs_type)) {
+ return BooleanVal::null();
+ }
+ bool result = bBoxIntersects(lhs_geom, rhs_geom, lhs_type, rhs_type);
+ return BooleanVal(result);
+}
+
+}
diff --git a/be/src/exprs/geo/geospatial-functions.h
b/be/src/exprs/geo/geospatial-functions.h
new file mode 100644
index 000000000..66110f843
--- /dev/null
+++ b/be/src/exprs/geo/geospatial-functions.h
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+
+#include "common/status.h"
+#include "udf/udf.h"
+
+namespace impala::geo {
+
+using impala_udf::FunctionContext;
+using impala_udf::BooleanVal;
+using impala_udf::IntVal;
+using impala_udf::BigIntVal;
+using impala_udf::DoubleVal;
+using impala_udf::StringVal;
+
+class Expr;
+class OpcodeRegistry;
+struct StringValue;
+class TupleRow;
+
+class GeospatialFunctions {
+ public:
+ // Accessors
+ static DoubleVal st_X(FunctionContext* ctx, const StringVal& geom);
+ static DoubleVal st_Y(FunctionContext* ctx, const StringVal& geom);
+ static DoubleVal st_MinX(FunctionContext* ctx, const StringVal& geom);
+ static DoubleVal st_MinY(FunctionContext* ctx, const StringVal& geom);
+ static DoubleVal st_MaxX(FunctionContext* ctx, const StringVal& geom);
+ static DoubleVal st_MaxY(FunctionContext* ctx, const StringVal& geom);
+ static StringVal st_GeometryType(FunctionContext* ctx, const StringVal&
geom);
+ static IntVal st_Srid(FunctionContext* ctx, const StringVal& geom);
+ static StringVal st_SetSrid(FunctionContext* ctx, const StringVal& geom,
+ const IntVal& srid);
+
+ // Predicates
+ static BooleanVal st_EnvIntersects(
+ FunctionContext* ctx, const StringVal& lhs,const StringVal& rhs);
+};
+
+}// namespace impala
diff --git a/be/src/exprs/geo/shape-format.h b/be/src/exprs/geo/shape-format.h
new file mode 100644
index 000000000..a7af82845
--- /dev/null
+++ b/be/src/exprs/geo/shape-format.h
@@ -0,0 +1,294 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "exprs/geo/common.h"
+
+namespace impala::geo {
+
+// This file is responsible for handling the header of the "esri shape" format
used for
+// geometries encoded as BINARY. This format is fully compatible with Java
framework
+// https://github.com/Esri/spatial-framework-for-hadoop
+// A 5 byte "OGC" header followed by the same format as the one used in shape
files:
+//
https://www.esri.com/content/dam/esrisites/sitecore-archive/Files/Pdfs/library/whitepapers/pdfs/shapefile.pdf
+//
+// The OGC header contains:
+// - 4 byte big endian SRID (reference system id)
+// - 1 byte type id (OGCType)
+// - no padding
+// see
https://github.com/Esri/spatial-framework-for-hadoop/blob/v2.2.0/hive/src/main/java/com/esri/hadoop/hive/GeometryUtils.java#L16
+//
+// The header of the shape file format ("ESRI header") contains:
+// - 4 byte type id
+// for POINT type store the coordinates:
+// - dimension * 8 byte to store x/y/z/m as doubles
+// for other types store the bounding box:
+// - min coordinates: 2 * 8 byte to store x/y as doubles
+// - max coordinates: 2 * 8 byte to store x/y as doubles
+// - min/max z/m are stored later in the headers - this file doesn't access
those
+// - no padding
+//
+// For some types this is follewed by a variable length part, which is not
handled here.
+// A POC example for handling the a full type:
+//
https://gerrit.cloudera.org/#/c/20602/6/be/src/exprs/geo/poly-line-shape-format.cc
+//
+// Currently only 2 dimensions are handled (xy), min/max for z/m has to be
accessed
+// with Java functions. The xy bounding box has the same offset and format in
+// xyz/xym/xyzm geometries so x/y accessors work in this case too.
+//
+// Functions are defined in the header to allow inlining bounding box check in
codegen.
+
+constexpr int SRID_SIZE = 4;
+constexpr int OGC_TYPE_SIZE = 1;
+
+constexpr int SRID_OFFSET = 0;
+constexpr int OGC_TYPE_OFFSET = 4;
+
+static_assert(OGC_TYPE_OFFSET == SRID_SIZE);
+
+
+constexpr int ESRI_TYPE_SIZE = 4;
+constexpr int ESRI_TYPE_OFFSET = 5;
+
+constexpr int X1_OFFSET = 9;
+constexpr int Y1_OFFSET = X1_OFFSET + sizeof(double);
+constexpr int X2_OFFSET = Y1_OFFSET + sizeof(double);
+constexpr int Y2_OFFSET = X2_OFFSET + sizeof(double);
+
+constexpr int MIN_GEOM_SIZE = 9;
+constexpr int MIN_POINT_SIZE = 25;
+constexpr int MIN_NON_POINT_SIZE = 41;
+
+static_assert(ESRI_TYPE_OFFSET == OGC_TYPE_OFFSET + OGC_TYPE_SIZE);
+static_assert(X1_OFFSET == ESRI_TYPE_OFFSET + ESRI_TYPE_SIZE);
+static_assert(MIN_GEOM_SIZE == SRID_SIZE + OGC_TYPE_SIZE + ESRI_TYPE_SIZE);
+static_assert(MIN_POINT_SIZE == MIN_GEOM_SIZE + 2 * sizeof(double));
+static_assert(MIN_NON_POINT_SIZE == MIN_POINT_SIZE + 2 * sizeof(double));
+
+// See
https://github.com/Esri/geometry-api-java/blob/v2.2.4/src/main/java/com/esri/core/geometry/ShapeType.java#L27
+enum EsriType: uint32_t {
+ ShapeNull = 0,
+ ShapePoint = 1,
+ ShapePointM = 21,
+ ShapePointZM = 11,
+ ShapePointZ = 9,
+ ShapeMultiPoint = 8,
+ ShapeMultiPointM = 28,
+ ShapeMultiPointZM = 18,
+ ShapeMultiPointZ = 20,
+ ShapePolyline = 3,
+ ShapePolylineM = 23,
+ ShapePolylineZM = 13,
+ ShapePolylineZ = 10,
+ ShapePolygon = 5,
+ ShapePolygonM = 25,
+ ShapePolygonZM = 15,
+ ShapePolygonZ = 19,
+ ShapeMultiPatchM = 31,
+ ShapeMultiPatch = 32,
+ ShapeGeneralPolyline = 50,
+ ShapeGeneralPolygon = 51,
+ ShapeGeneralPoint = 52,
+ ShapeGeneralMultiPoint = 53,
+ ShapeGeneralMultiPatch = 54,
+ ShapeTypeLast = 55
+};
+
+constexpr std::array<EsriType, ST_MULTIPOLYGON + 1> OGCTypeToEsriType = {{
+ ShapeNull, // UNKNOWN
+ ShapePoint, // ST_POINT
+ ShapePolyline, // ST_LINESTRING
+ ShapePolygon, // ST_POLYGON
+ ShapeMultiPoint, // ST_MULTIPOINT
+ ShapePolyline, // ST_MULTILINESTRING
+ ShapePolygon // ST_MULTIPOLYGON
+}};
+
+template <class T>
+T readFromGeom(const StringVal& geom, int offset) {
+ DCHECK_GE(geom.len, offset + sizeof(T));
+ return *reinterpret_cast<T*>(geom.ptr + offset);
+}
+
+template <class T>
+void writeToGeom(const T& val, StringVal& geom, int offset) {
+ DCHECK_GE(geom.len, offset + sizeof(T));
+ T* ptr = reinterpret_cast<T*>(geom.ptr + offset);
+ *ptr = val;
+}
+
+// getters/setters for OGC header:
+
+inline uint32_t getSrid(const StringVal& geom) {
+ static_assert(SRID_SIZE == sizeof(uint32_t));
+
+ // SRID is in big endian format in 'geom', but Impala only supports little
endian so we
+ // have to convert it.
+#ifndef IS_LITTLE_ENDIAN
+ static_assert(false, "Only the little endian byte order is supported.");
+#endif
+ const uint32_t srid_bytes = readFromGeom<uint32_t>(geom, SRID_OFFSET);
+ return BitUtil::ByteSwap(srid_bytes);
+}
+
+inline OGCType getOGCType(const StringVal& geom) {
+ static_assert(OGC_TYPE_SIZE == sizeof(char));
+ const char res = readFromGeom<char>(geom, OGC_TYPE_OFFSET);
+ return static_cast<OGCType>(res);
+}
+
+inline constexpr const char* getGeometryType(OGCType ogc_type) {
+ return OGCTypeToStr[ogc_type];
+}
+
+inline void setSrid(StringVal& geom, uint32_t srid) {
+ static_assert(SRID_SIZE == sizeof(uint32_t));
+
+ // SRID is in big endian format in 'geom', but Impala only supports little
endian so we
+ // have to convert it.
+#ifndef IS_LITTLE_ENDIAN
+ static_assert(false, "Only the little endian byte order is supported.");
+#endif
+ const uint32_t srid_bytes = BitUtil::ByteSwap(srid);
+ writeToGeom<uint32_t>(srid_bytes, geom, SRID_OFFSET);
+}
+
+inline void setOGCType(StringVal& geom, OGCType ogc_type) {
+ writeToGeom<char>(ogc_type, geom, OGC_TYPE_OFFSET);
+}
+
+// getters/setters for ESRI header:
+
+inline EsriType getEsriType(const StringVal& geom) {
+ static_assert(ESRI_TYPE_SIZE == sizeof(EsriType));
+ return readFromGeom<EsriType>(geom, ESRI_TYPE_OFFSET);
+}
+
+inline double getMinX(const StringVal& geom) {
+ return readFromGeom<double>(geom, X1_OFFSET);
+}
+
+inline double getMinY(const StringVal& geom) {
+ return readFromGeom<double>(geom, Y1_OFFSET);
+}
+
+inline double getMaxX(const StringVal& geom) {
+ return readFromGeom<double>(geom, X2_OFFSET);
+}
+
+inline double getMaxY(const StringVal& geom) {
+ return readFromGeom<double>(geom, Y2_OFFSET);
+}
+
+inline void setEsriType(StringVal& geom, EsriType esri_type) {
+ static_assert(ESRI_TYPE_SIZE == sizeof(EsriType));
+ writeToGeom<EsriType>(esri_type, geom, ESRI_TYPE_OFFSET);
+}
+
+inline void setMinX(StringVal& geom, double x) {
+ writeToGeom<double>(x, geom, X1_OFFSET);
+}
+
+inline void setMinY(StringVal& geom, double y) {
+ writeToGeom<double>(y, geom, Y1_OFFSET);
+}
+
+inline void setMaxX(StringVal& geom, double x) {
+ writeToGeom<double>(x, geom, X2_OFFSET);
+}
+
+inline void setMaxY(StringVal& geom, double y) {
+ writeToGeom<double>(y, geom, Y2_OFFSET);
+}
+
+// Validate header and get type
+inline bool ParseHeader(FunctionContext* ctx, const StringVal& geom, OGCType*
ogc_type) {
+ DCHECK(ogc_type != nullptr);
+
+ if (UNLIKELY(geom.is_null)) return false;
+
+ if (UNLIKELY(geom.len < MIN_GEOM_SIZE)) {
+ ctx->SetError("Geometry size too small.");
+ return false;
+ }
+
+ const OGCType unchecked_ogc_type = getOGCType(geom);
+ if (UNLIKELY(unchecked_ogc_type < UNKNOWN || unchecked_ogc_type >
ST_MULTIPOLYGON)) {
+ ctx->SetError("Invalid geometry type.");
+ return false;
+ }
+
+ if (UNLIKELY(unchecked_ogc_type == UNKNOWN)) {
+ ctx->SetError("Geometry type UNKNOWN.");
+ return false;
+ }
+
+ if (UNLIKELY(unchecked_ogc_type == ST_POINT)) {
+ if (geom.len < MIN_POINT_SIZE) {
+ ctx->SetError("Geometry size too small for ST_POINT type.");
+ return false;
+ }
+ } else {
+ if (UNLIKELY(geom.len < MIN_NON_POINT_SIZE)) {
+ ctx->SetError("Geometry size too small for non ST_POINT type.");
+ return false;
+ }
+ }
+
+ // TODO: fix Z/M/ZM types and move to a function called from DCHECK
+ // ogc vs ESRI type checking can be useful during development, but it
+ // is unnecessary overhead in production
+ /*const EsriType esri_type = getEsriType(geom);
+ DCHECK_LT(unchecked_ogc_type, OGCTypeToEsriType.size());
+ const EsriType expected_esri_type = OGCTypeToEsriType[unchecked_ogc_type];
+ if (expected_esri_type != esri_type) {
+ // TODO: To test it we need to create a table with 3D types, we cannot
create them
+ // with native constructors.
+ ctx->SetError(strings::Substitute(
+ "Invalid geometry: OGCType and EsriType do not match. "
+ "Because the OGCType is $0, expected EsriType $1, found $2.",
+ OGCTypeToStr[unchecked_ogc_type], expected_esri_type,
esri_type).c_str());
+ }*/
+
+ *ogc_type = static_cast<OGCType>(unchecked_ogc_type);
+ return true;
+}
+
+// Bounding box check for x/y coordinates of two geometries. z/m are ignored,
which
+// is consistent with the original Java functions.
+inline bool bBoxIntersects(const StringVal& lhs_geom, const StringVal rhs_geom,
+ OGCType lhs_type, OGCType rhs_type) {
+ bool is_lhs_point = lhs_type == ST_POINT;
+ double xmin1 = getMinX(lhs_geom);
+ double ymin1 = getMinY(lhs_geom);
+ double xmax1 = is_lhs_point ? xmin1 : getMaxX(lhs_geom);
+ double ymax1 = is_lhs_point ? ymin1 : getMaxY(lhs_geom);
+
+ bool is_rhs_point = rhs_type == ST_POINT;
+ double xmin2 = getMinX(rhs_geom);
+ double ymin2 = getMinY(rhs_geom);
+ double xmax2 = is_rhs_point ? xmin2 : getMaxX(rhs_geom);
+ double ymax2 = is_rhs_point ? ymin2 : getMaxY(rhs_geom);
+
+ if (xmax1 < xmin2 || xmax2 < xmin1 || ymax1 < ymin2 || ymax2 < ymin1 )
return false;
+ return true;
+}
+
+} // namespace impala
diff --git a/be/src/exprs/scalar-expr-evaluator.cc
b/be/src/exprs/scalar-expr-evaluator.cc
index e4d9cb3fd..0f0754f06 100644
--- a/be/src/exprs/scalar-expr-evaluator.cc
+++ b/be/src/exprs/scalar-expr-evaluator.cc
@@ -33,6 +33,7 @@
#include "exprs/date-functions.h"
#include "exprs/decimal-functions.h"
#include "exprs/decimal-operators.h"
+#include "exprs/geo/geospatial-functions.h"
#include "exprs/hive-udf-call.h"
#include "exprs/iceberg-functions.h"
#include "exprs/in-predicate.h"
@@ -459,6 +460,7 @@ void ScalarExprEvaluator::InitBuiltinsDummy() {
DataSketchesFunctions::DsHllEstimate(nullptr, StringVal::null());
DecimalFunctions::Precision(nullptr, DecimalVal::null());
DecimalOperators::CastToDecimalVal(nullptr, DecimalVal::null());
+ geo::GeospatialFunctions::st_MaxX(nullptr, StringVal::null());
IcebergFunctions::TruncatePartitionTransform(nullptr, IntVal::null(),
IntVal::null());
InPredicate::InIterate(nullptr, BigIntVal::null(), 0, nullptr);
IsNullPredicate::IsNull(nullptr, BooleanVal::null());
diff --git
a/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveEsriGeospatialBuiltins.java
b/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveEsriGeospatialBuiltins.java
index 578dd6d0f..048958e1e 100644
---
a/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveEsriGeospatialBuiltins.java
+++
b/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveEsriGeospatialBuiltins.java
@@ -48,42 +48,67 @@ import org.apache.impala.catalog.Type;
import
org.apache.impala.hive.executor.BinaryToBinaryHiveLegacyFunctionExtractor;
import org.apache.impala.hive.executor.HiveJavaFunction;
import org.apache.impala.hive.executor.HiveLegacyJavaFunction;
+import org.apache.impala.service.BackendConfig;
import com.google.common.base.Preconditions;
import org.apache.impala.analysis.FunctionName;
import org.apache.impala.thrift.TFunctionBinaryType;
+import org.apache.impala.thrift.TGeospatialLibrary;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HiveEsriGeospatialBuiltins {
+ private final static Logger LOG = LoggerFactory.getLogger(
+ HiveEsriGeospatialBuiltins.class);
+
/**
* Initializes Hive's ESRI geospatial UDFs as builtins.
*/
public static void initBuiltins(Db db) {
- addLegacyUDFs(db);
+ TGeospatialLibrary lib = BackendConfig.INSTANCE.getGeospatialLibrary();
+ // Currently all native functions are expected to be 100% compatible with
the Java
+ // version. This is not true for the full function set of
+ // https://gerrit.cloudera.org/#/c/20602/ , which sets based on a flag to
allow
+ // full compatibility with Hive.
+ boolean addNatives = true;
+ addLegacyUDFs(db, addNatives);
addGenericUDFs(db);
addVarargsUDFs(db);
+ if(addNatives) {
+ addNatives(db);
+ }
}
- private static void addLegacyUDFs(Db db) {
+ private static void addLegacyUDFs(Db db, boolean addNatives) {
List<UDF> legacyUDFs = Arrays.asList(new ST_Area(), new ST_AsBinary(),
new ST_AsGeoJson(), new ST_AsJson(), new ST_AsShape(), new ST_AsText(),
new ST_Boundary(), new ST_Buffer(), new ST_Centroid(), new
ST_CoordDim(),
new ST_Difference(), new ST_Dimension(), new ST_Distance(), new
ST_EndPoint(),
- new ST_Envelope(), new ST_EnvIntersects(), new ST_ExteriorRing(),
+ new ST_Envelope(), new ST_ExteriorRing(),
new ST_GeodesicLengthWGS84(), new ST_GeomCollection(), new
ST_GeometryN(),
- new ST_GeometryType(), new ST_GeomFromShape(), new ST_GeomFromText(),
+ new ST_GeomFromShape(), new ST_GeomFromText(),
new ST_GeomFromWKB(), new ST_InteriorRingN(), new ST_Intersection(),
new ST_Is3D(), new ST_IsClosed(), new ST_IsEmpty(), new
ST_IsMeasured(),
new ST_IsRing(), new ST_IsSimple(), new ST_Length(), new
ST_LineFromWKB(),
- new ST_M(), new ST_MaxM(), new ST_MaxX(), new ST_MaxY(), new ST_MaxZ(),
- new ST_MinM(), new ST_MinX(), new ST_MinY(), new ST_MinZ(), new
ST_MLineFromWKB(),
+ new ST_M(), new ST_MaxM(), new ST_MaxZ(),
+ new ST_MinM(), new ST_MinZ(), new ST_MLineFromWKB(),
new ST_MPointFromWKB(), new ST_MPolyFromWKB(), new ST_NumGeometries(),
new ST_NumInteriorRing(), new ST_NumPoints(), new ST_Point(),
new ST_PointFromWKB(), new ST_PointN(), new ST_PointZ(), new
ST_PolyFromWKB(),
- new ST_Relate(), new ST_SRID(), new ST_StartPoint(), new
ST_SymmetricDiff(),
- new ST_X(), new ST_Y(), new ST_Z(), new ST_SetSRID());
+ new ST_Relate(), new ST_StartPoint(), new ST_SymmetricDiff(),
+ new ST_Z());
+
+ List<UDF> legacyUDFsWithNativeImplementation = Arrays.asList(
+ new ST_EnvIntersects(), new ST_GeometryType(),
+ new ST_MaxX(), new ST_MaxY(),
+ new ST_MinX(), new ST_MinY(),
+ new ST_SRID(), new ST_SetSRID(),
+ new ST_X(), new ST_Y()
+ );
+ if (!addNatives) {
+ legacyUDFs.addAll(legacyUDFsWithNativeImplementation);
+ }
for (UDF udf : legacyUDFs) {
for (Function fn : extractFromLegacyHiveBuiltin(udf, db.getName())) {
@@ -206,4 +231,35 @@ public class HiveEsriGeospatialBuiltins {
})
.collect(Collectors.toList());
}
+
+ private static void addNative(Db db, String fnNameBase, String fnNameSuffix,
+ boolean varArgs, Type retType, Type... argTypes) {
+ String udfName = fnNameBase.toLowerCase();
+ String geospatialFnPrefix = "impala::geo::GeospatialFunctions::";
+ String cppSymbolName = geospatialFnPrefix + fnNameBase + fnNameSuffix;
+
+ db.addScalarBuiltin(udfName, cppSymbolName, true, varArgs, retType,
argTypes);
+ }
+
+ private static void addNative(Db db, String fnName, boolean varArgs, Type
retType,
+ Type... argTypes) {
+ addNative(db, fnName, "", varArgs, retType, argTypes);
+ }
+
+ private static void addNatives(Db db) {
+ // Legacy UDFs.
+ // Accessors.
+ addNative(db, "st_MinX", false, Type.DOUBLE, Type.BINARY);
+ addNative(db, "st_MaxX", false, Type.DOUBLE, Type.BINARY);
+ addNative(db, "st_MinY", false, Type.DOUBLE, Type.BINARY);
+ addNative(db, "st_MaxY", false, Type.DOUBLE, Type.BINARY);
+ addNative(db, "st_X", false, Type.DOUBLE, Type.BINARY);
+ addNative(db, "st_Y", false, Type.DOUBLE, Type.BINARY);
+ addNative(db, "st_Srid", false, Type.INT, Type.BINARY);
+ addNative(db, "st_SetSrid", false, Type.BINARY, Type.BINARY, Type.INT);
+ addNative(db, "st_GeometryType", false, Type.STRING, Type.BINARY);
+
+ // Predicates.
+ addNative(db, "st_EnvIntersects", false, Type.BOOLEAN, Type.BINARY,
Type.BINARY);
+ }
}
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-extra.test
b/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-extra.test
new file mode 100644
index 000000000..a07fbef46
--- /dev/null
+++
b/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-extra.test
@@ -0,0 +1,394 @@
+=====
+---- QUERY
+select st_bin(1, "point empty")
+---- TYPES
+BIGINT
+---- RESULTS
+0
+====
+---- QUERY
+select st_bin(1, "point(10 10)")
+---- TYPES
+BIGINT
+---- RESULTS
+4611685985093119520
+====
+---- QUERY
+select st_bin(1.0, st_point(10, 10))
+---- TYPES
+BIGINT
+---- RESULTS
+4611685985093119520
+====
+---- QUERY
+select st_bin(1.0, "point(-200 50)")
+---- TYPES
+BIGINT
+---- RESULTS
+4611685863613099350
+====
+---- QUERY
+select st_bin(1, st_point(-200, 50))
+---- TYPES
+BIGINT
+---- RESULTS
+4611685863613099350
+====
+---- QUERY
+select st_bin(1, "point(-500 -1000)")
+---- TYPES
+BIGINT
+---- RESULTS
+4611689052463623000
+====
+---- QUERY
+select st_bin(1.0, st_point(-500, -1000))
+---- TYPES
+BIGINT
+---- RESULTS
+4611689052463623000
+====
+---- QUERY
+select st_bin(1.0, "point(800 -5000)")
+---- TYPES
+BIGINT
+---- RESULTS
+4611701200465620300
+====
+---- QUERY
+select st_bin(1, st_point(800, -5000))
+---- TYPES
+BIGINT
+---- RESULTS
+4611701200465620300
+====
+
+---- QUERY
+select st_astext(st_binenvelope(1, 4611685985093119520));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((9.5 9.5, 10.5 9.5, 10.5 10.5, 9.5 10.5, 9.5 9.5))'
+====
+---- QUERY
+select st_astext(st_binenvelope(1.0, 4611685985093119520));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((9.5 9.5, 10.5 9.5, 10.5 10.5, 9.5 10.5, 9.5 9.5))'
+====
+
+---- QUERY
+select st_astext(st_binenvelope(1, 4611685863613099350));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((-200.5 49.5, -199.5 49.5, -199.5 50.5, -200.5 50.5, -200.5 49.5))'
+====
+---- QUERY
+select st_astext(st_binenvelope(1.0, 4611685863613099350));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((-200.5 49.5, -199.5 49.5, -199.5 50.5, -200.5 50.5, -200.5 49.5))'
+====
+
+---- QUERY
+select st_astext(st_binenvelope(1, 4611689052463623000));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((-500.5 -1000.5, -499.5 -1000.5, -499.5 -999.5, -500.5 -999.5,
-500.5 -1000.5))'
+====
+---- QUERY
+select st_astext(st_binenvelope(1.0, 4611689052463623000));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((-500.5 -1000.5, -499.5 -1000.5, -499.5 -999.5, -500.5 -999.5,
-500.5 -1000.5))'
+====
+
+---- QUERY
+select st_astext(st_binenvelope(1, 4611701200465620300));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((799.5 -5000.5, 800.5 -5000.5, 800.5 -4999.5, 799.5 -4999.5, 799.5
-5000.5))'
+====
+---- QUERY
+select st_astext(st_binenvelope(1.0, 4611701200465620300));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((799.5 -5000.5, 800.5 -5000.5, 800.5 -4999.5, 799.5 -4999.5, 799.5
-5000.5))'
+====
+
+---- QUERY
+select st_astext(st_binenvelope(1, st_point(1, 2)));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
+====
+---- QUERY
+select st_astext(st_binenvelope(1.0, st_point(1, 2)));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
+====
+
+---- QUERY
+select st_astext(st_binenvelope(1, "point(1 2)"));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
+====
+---- QUERY
+select st_astext(st_binenvelope(1.0, "point(1 2)"));
+---- TYPES
+STRING
+---- RESULTS
+'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
+====
+---- QUERY
+# ST_Intersects(BINARY, STRING)
+select ST_Intersects(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Intersects(st_polygon(0,0, 0,1, 0.5,1), "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Intersects(st_polygon(0,0, 0,1, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4
1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+true,false,true
+====
+---- QUERY
+# ST_Intersects(STRING, BINARY)
+select ST_Intersects("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,4,
4,1)),
+ ST_Intersects("POLYGON ((0 0, 0 1, 0.5 1))", ST_Polygon(1,1, 1,4, 4,4,
4,1)),
+ ST_Intersects("POLYGON ((0 0, 0 1, 1 1))", ST_Polygon(1,1, 1,4, 4,4,
4,1));
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+true,false,true
+====
+---- QUERY
+# ST_Intersects(STRING, STRING)
+select ST_Intersects("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Intersects("POLYGON ((0 0, 0 1, 0.5 1))", "POLYGON ((1 1, 1 4, 4 4,
4 1))"),
+ ST_Intersects("POLYGON ((0 0, 0 1, 1 1))", "POLYGON ((1 1, 1 4, 4 4, 4
1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+true,false,true
+====
+---- QUERY
+# ST_Overlaps(BINARY, STRING)
+select ST_Overlaps(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Overlaps(st_polygon(0,0, 0,1, 0.5,1), "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Overlaps(st_polygon(0,0, 0,1, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4
1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+true,false,false
+====
+---- QUERY
+# ST_Overlaps(STRING, BINARY)
+select ST_Overlaps("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,4,
4,1)),
+ ST_Overlaps("POLYGON ((0 0, 0 1, 0.5 1))", ST_Polygon(1,1, 1,4, 4,4,
4,1)),
+ ST_Overlaps("POLYGON ((0 0, 0 1, 1 1))", ST_Polygon(1,1, 1,4, 4,4,
4,1));
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+true,false,false
+====
+---- QUERY
+# ST_Overlaps(STRING, STRING)
+select ST_Overlaps("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Overlaps("POLYGON ((0 0, 0 1, 0.5 1))", "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Overlaps("POLYGON ((0 0, 0 1, 1 1))", "POLYGON ((1 1, 1 4, 4 4, 4
1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+true,false,false
+====
+---- QUERY
+# ST_Touches(BINARY, STRING)
+select ST_Touches(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Touches(st_polygon(0,0, 0,1, 0.5,1), "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Touches(st_polygon(0,0, 0,1, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,false,true
+====
+---- QUERY
+# ST_Touches(STRING, BINARY)
+select ST_Touches("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,4,
4,1)),
+ ST_Touches("POLYGON ((0 0, 0 1, 0.5 1))", ST_Polygon(1,1, 1,4, 4,4,
4,1)),
+ ST_Touches("POLYGON ((0 0, 0 1, 1 1))", ST_Polygon(1,1, 1,4, 4,4,
4,1));
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,false,true
+====
+---- QUERY
+# ST_Touches(STRING, STRING)
+select ST_Touches("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Touches("POLYGON ((0 0, 0 1, 0.5 1))", "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Touches("POLYGON ((0 0, 0 1, 1 1))", "POLYGON ((1 1, 1 4, 4 4, 4
1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,false,true
+====
+---- QUERY
+# ST_Contains(BINARY, STRING)
+select ST_Contains(st_polygon(2,2, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Contains(st_polygon(0,0, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Contains(st_polygon(0,0, 10,0, 0, 10), "POLYGON ((1 1, 1 4, 4 4, 4
1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,false,true
+====
+---- QUERY
+# ST_Contains(STRING, BINARY)
+select ST_Contains("POLYGON ((2 0, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4,
4,1)),
+ ST_Contains("POLYGON ((0 0, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4,
4,1)),
+ ST_Contains("POLYGON ((0 0, 10 0, 0 10))", ST_Polygon(1,1, 1,4, 4,4,
4,1));
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,false,true
+====
+---- QUERY
+# ST_Contains(STRING, STRING)
+select ST_Contains("POLYGON ((2 0, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Contains("POLYGON ((0 0, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Contains("POLYGON ((0 0, 10 0, 0 10))", "POLYGON ((1 1, 1 4, 4 4, 4
1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,false,true
+====
+---- QUERY
+# ST_Within(BINARY, STRING)
+select ST_Within(st_polygon(2,2, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Within(st_polygon(0,0, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Within(st_polygon(0,0, 10,0, 0, 10), "POLYGON ((1 1, 1 4, 4 4, 4
1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+true,false,false
+====
+---- QUERY
+# ST_Within(STRING, BINARY)
+select ST_Within("POLYGON ((2 2, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
+ ST_Within("POLYGON ((0 0, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
+ ST_Within("POLYGON ((0 0, 10 0, 0 10))", ST_Polygon(1,1, 1,4, 4,4,
4,1));
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+true,false,false
+====
+---- QUERY
+# ST_Within(STRING, STRING)
+select ST_Within("POLYGON ((2 2, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Within("POLYGON ((0 0, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4
1))"),
+ ST_Within("POLYGON ((0 0, 10 0, 0 10))", "POLYGON ((1 1, 1 4, 4 4, 4
1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+true,false,false
+====
+---- QUERY
+# ST_Crosses(BINARY, STRING)
+select ST_Crosses(st_linestring(2,2, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Crosses(st_linestring(0,0, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Crosses(st_linestring(0,0, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,false,true
+====
+---- QUERY
+# ST_Crosses(STRING, BINARY)
+select ST_Crosses("LINESTRING (2 2, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
+ ST_Crosses("LINESTRING (0 0, 1 1)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
+ ST_Crosses("LINESTRING (0 0, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1));
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,false,true
+====
+---- QUERY
+# ST_Crosses(STRING, STRING)
+select ST_Crosses("LINESTRING (2 2, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Crosses("LINESTRING (0 0, 1 1)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Crosses("LINESTRING (0 0, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,false,true
+====
+---- QUERY
+# ST_Disjoint(BINARY, STRING)
+select ST_Disjoint(st_linestring(2,2, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Disjoint(st_linestring(1,0, 0,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Disjoint(st_linestring(0,0, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,true,false
+====
+---- QUERY
+# ST_Disjoint(STRING, BINARY)
+select ST_Disjoint("LINESTRING (2 2, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
+ ST_Disjoint("LINESTRING (1 0, 0 1)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
+ ST_Disjoint("LINESTRING (0 0, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1));
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,true,false
+====
+---- QUERY
+# ST_Disjoint(STRING, STRING)
+select ST_Disjoint("LINESTRING (2 2, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Disjoint("LINESTRING (1 0, 0 1)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
+ ST_Disjoint("LINESTRING (0 0, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,true,false
+====
+---- QUERY
+# ST_Equals(BINARY, STRING)
+select ST_Equals(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 1))"),
+ ST_Equals(st_polygon(1,1, 1,4, 4,1), "POLYGON ((1 1, 1 4, 4 1))"),
+ ST_Equals(st_polygon(0,0, 0,1, 1,0), "POLYGON ((1 1, 1 4, 4 1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,true,false
+====
+---- QUERY
+# ST_Equals(STRING, BINARY)
+select ST_Equals("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,1)),
+ ST_Equals("POLYGON ((1 1, 1 4, 4 1))", ST_Polygon(1,1, 1,4, 4,1)),
+ ST_Equals("POLYGON ((0 0, 0 1, 1 0))", ST_Polygon(1,1, 1,4, 4,1));
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,true,false
+====
+---- QUERY
+# ST_Equals(STRING, STRING)
+select ST_Equals("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 1))"),
+ ST_Equals("POLYGON ((1 1, 1 4, 4 1))", "POLYGON ((1 1, 1 4, 4 1))"),
+ ST_Equals("POLYGON ((0 0, 0 1, 1 0))", "POLYGON ((1 1, 1 4, 4 1))");
+---- TYPES
+BOOLEAN,BOOLEAN,BOOLEAN
+---- RESULTS
+false,true,false
+====
\ No newline at end of file
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri.test
b/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri.test
index 6a2abb4f3..158309004 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri.test
@@ -2717,6 +2717,7 @@ select
ST_AsText(ST_SetSRID(ST_GeomFromText('MultiLineString((0 80, 0.03 80.04))
'MULTILINESTRING ((0 80, 0.03 80.04))'
====
---- QUERY
+# TODO: move these new tests to geospatial-esri-extra?
# NOTE: Due to HIVE-29323 ESRI returns MULTIPOLYGON EMPTY for single point
# PostGIS would return: POINT (1 2)
select ST_AsText(ST_ConvexHull(ST_Point(1, 2)));
diff --git a/tests/custom_cluster/test_geospatial_library.py
b/tests/custom_cluster/test_geospatial_library.py
index d1a8474d8..b1e7f8820 100644
--- a/tests/custom_cluster/test_geospatial_library.py
+++ b/tests/custom_cluster/test_geospatial_library.py
@@ -22,6 +22,7 @@ from tests.common.custom_cluster_test_suite import
CustomClusterTestSuite
from tests.common.skip import SkipIfApacheHive
ST_POINT_SIGNATURE = "BINARY\tst_point(STRING)\tJAVA\ttrue"
+ST_X_SIGNATURE_BUILTIN = "DOUBLE\tst_x(BINARY)\tBUILTIN\ttrue"
SHOW_FUNCTIONS = "show functions in _impala_builtins"
@@ -34,9 +35,11 @@ class TestGeospatialLibrary(CustomClusterTestSuite):
def test_disabled(self):
result = self.execute_query(SHOW_FUNCTIONS)
assert ST_POINT_SIGNATURE not in result.data
+ assert ST_X_SIGNATURE_BUILTIN not in result.data
@SkipIfApacheHive.feature_not_supported
@pytest.mark.execute_serially
def test_enabled(self):
result = self.execute_query(SHOW_FUNCTIONS)
assert ST_POINT_SIGNATURE in result.data
+ assert ST_X_SIGNATURE_BUILTIN in result.data
diff --git a/tests/query_test/test_geospatial_functions.py
b/tests/query_test/test_geospatial_functions.py
index acaad7147..810dded74 100644
--- a/tests/query_test/test_geospatial_functions.py
+++ b/tests/query_test/test_geospatial_functions.py
@@ -18,14 +18,29 @@
from __future__ import absolute_import, division, print_function
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfApacheHive
+from tests.common.test_dimensions import create_single_exec_option_dimension
class TestGeospatialFuctions(ImpalaTestSuite):
+
+ @classmethod
+ def add_test_dimensions(cls):
+ super(TestGeospatialFuctions, cls).add_test_dimensions()
+ cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
+ # Tests do not use tables at the moment, skip other fileformats than
Parquet.
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
+ v.get_value('table_format').file_format == 'parquet')
+
"""Tests the geospatial builtin functions"""
@SkipIfApacheHive.feature_not_supported
def test_esri_geospatial_functions(self, vector):
+ # tests generated from
+ #
https://github.com/Esri/spatial-framework-for-hadoop/tree/master/hive/test
self.run_test_case('QueryTest/geospatial-esri', vector)
+ # manual tests added
+ self.run_test_case('QueryTest/geospatial-esri-extra', vector)
+ @SkipIfApacheHive.feature_not_supported
def test_esri_geospatial_planner(self, vector):
# These tests are not among planner tests because with default flags
# geospatial builtin functions are not loaded.