This is an automated email from the ASF dual-hosted git repository.
indhumuthumurugesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 70643df [CARBONDATA-4196] Allow zero or more white space in GEO UDFs
70643df is described below
commit 70643dfc9e8a390a6928e2c1156128053961543d
Author: Nihal ojha <[email protected]>
AuthorDate: Fri Jun 4 10:39:31 2021 +0530
[CARBONDATA-4196] Allow zero or more white space in GEO UDFs
Why is this PR needed?
Currently, regex of geo UDF is not allowing zero space between
UDF name and parenthesis. It always expects a single space in
between. For ex: linestring (120.184179 30.327465). Because of
this sometimes using the UDFs without space is not giving
the expected result.
What changes were proposed in this PR?
Allow zero space between UDFs and parenthesis.
Does this PR introduce any user interface change?
No
Is any new testcase added?
Yes
This closes #4145
---
.../main/java/org/apache/carbondata/geo/GeoConstants.java | 6 +++---
.../test/scala/org/apache/carbondata/geo/GeoQueryTest.scala | 2 +-
.../src/test/scala/org/apache/carbondata/geo/GeoTest.scala | 12 +++++++-----
3 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/geo/src/main/java/org/apache/carbondata/geo/GeoConstants.java
b/geo/src/main/java/org/apache/carbondata/geo/GeoConstants.java
index 67baa82..20d7acc 100644
--- a/geo/src/main/java/org/apache/carbondata/geo/GeoConstants.java
+++ b/geo/src/main/java/org/apache/carbondata/geo/GeoConstants.java
@@ -31,13 +31,13 @@ public class GeoConstants {
public static final String POSITIVE_INTEGER_REGEX = "^[+]?\\d*[1-9]\\d*$";
// Regular expression to parse input polygons for IN_POLYGON_LIST
- public static final String POLYGON_REG_EXPRESSION = "(?<=POLYGON
\\(\\()(.*?)(?=(\\)\\)))";
+ public static final String POLYGON_REG_EXPRESSION = "(?<=POLYGON[
]{0,1}\\(\\()(.*?)(?=(\\)\\)))";
// Regular expression to parse input polylines for IN_POLYLINE_LIST
- public static final String POLYLINE_REG_EXPRESSION = "LINESTRING \\(.*?\\)";
+ public static final String POLYLINE_REG_EXPRESSION = "LINESTRING *\\(.*?\\)";
// Regular expression to parse input rangelists for IN_POLYGON_RANGE_LIST
- public static final String RANGELIST_REG_EXPRESSION = "(?<=RANGELIST
\\()(.*?)(?=\\))";
+ public static final String RANGELIST_REG_EXPRESSION = "(?<=RANGELIST[
]{0,1}\\()(.*?)(?=\\))";
public static final String GRID_SIZE = "gridSize";
diff --git
a/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoQueryTest.scala
b/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoQueryTest.scala
index 57394aa..d56ed73 100644
---
a/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoQueryTest.scala
+++
b/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoQueryTest.scala
@@ -41,7 +41,7 @@ class GeoQueryTest extends QueryTest with BeforeAndAfterAll
with BeforeAndAfterE
sql(s"insert into $polygonTable select 'POLYGON ((120.176433
30.327431,120.171283 30.322245," +
s"120.181411 30.314540, 120.190509 30.321653,120.185188
30.329358,120.176433 30.327431))" +
s"','abc','1'")
- sql(s"insert into $polygonTable select 'polygon ((120.191603
30.328946,120.184179 30.327465," +
+ sql(s"insert into $polygonTable select 'polygon((120.191603
30.328946,120.184179 30.327465," +
s"120.181819 30.321464,120.190359 30.315388,120.199242
30.324464,120.191603 30.328946))'," +
s"'abc','1'")
sql(s"insert into $polygonTable select null,'abc','1'")
diff --git
a/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala
b/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala
index 628d690..b911c26 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala
@@ -631,14 +631,16 @@ class GeoTest extends QueryTest with BeforeAndAfterAll
with BeforeAndAfterEach {
test("test one polyline query") {
createTable()
loadData2()
- checkAnswer(
- sql(s"select longitude, latitude from $table1 where IN_POLYLINE_LIST(" +
+ val df = sql(s"select longitude, latitude from $table1 where
IN_POLYLINE_LIST(" +
s"'LINESTRING (120.184179 30.327465, 120.191603 30.328946, 120.199242
30.324464, " +
- s"120.190359 30.315388)', 65)"),
- Seq(Row(120184976, 30327105),
+ s"120.190359 30.315388)', 65)")
+ checkAnswer(df, Seq(Row(120184976, 30327105),
Row(120197093, 30325985),
Row(120196020, 30321651),
Row(120198638, 30323540)))
+ checkAnswer(sql(s"select longitude, latitude from $table1 where
IN_POLYLINE_LIST(" +
+ s"'LINESTRING(120.184179 30.327465, 120.191603 30.328946, 120.199242
30.324464, " +
+ s"120.190359 30.315388)', 65)"), df)
}
test("test polyline list query, result is union of two polylines") {
@@ -669,7 +671,7 @@ class GeoTest extends QueryTest with BeforeAndAfterAll with
BeforeAndAfterEach {
loadData()
checkAnswer(
sql(s"select mygeohash, longitude, latitude from $table1 where
IN_POLYGON_RANGE_LIST(" +
- s"'RANGELIST (855279368848 855279368850, 855280799610 855280799612)',
'OR')"),
+ s"'RANGELIST(855279368848 855279368850, 855280799610 855280799612)',
'OR')"),
Seq(Row(855279368850L, 116288955, 39999101),
Row(855280799612L, 116285807, 40084087)))
}