This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 57a1f3f Support JSON queries with top-level array path expression.
(#7511)
57a1f3f is described below
commit 57a1f3fe9674c57f29c470e3613d68a294fd7604
Author: Amrish Lal <[email protected]>
AuthorDate: Thu Oct 7 11:55:02 2021 -0700
Support JSON queries with top-level array path expression. (#7511)
* Support JSON queries with top-level array path expression.
* Cleanup.
* Properly detect top-level path expressions.
* add test case for top-level path expressions in JSON_MATCH
* optimize code.
* Map identifier 'column[0]' to json path '0' for compatiblity with
JSON_MATCH.
* Rebuild.
* Rebuild.
* codereview changes.
* Rebuild.
* Update
pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
Co-authored-by: Xiaotian (Jackie) Jiang
<[email protected]>
Co-authored-by: Xiaotian (Jackie) Jiang
<[email protected]>
---
.../statement/JsonStatementOptimizer.java | 36 +++++++++---
.../statement/JsonStatementOptimizerTest.java | 39 +++++++++++++
.../apache/pinot/queries/JsonPathQueriesTest.java | 68 ++++++++++++----------
3 files changed, 102 insertions(+), 41 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
index 258c796..d2f2198 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
@@ -24,7 +24,6 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
-import org.apache.commons.lang.StringUtils;
import org.apache.pinot.common.function.scalar.ArithmeticFunctions;
import org.apache.pinot.common.function.scalar.DateTimeFunctions;
import org.apache.pinot.common.request.Expression;
@@ -384,13 +383,34 @@ public class JsonStatementOptimizer implements
StatementOptimizer {
}
/**
- * @return A string array containing all the parts of an identifier. An
identifier may have one or more parts that
- * are joined together using <DOT>. For example the identifier
"testTable.jsonColumn.name.first" consists up of
- * "testTable" (name of table), "jsonColumn" (name of column), "name" (json
path), and "first" (json path). The last
- * two parts when joined together (name.first) represent a JSON path
expression.
+ * @return A two element String array where the first element is the column
name and second element is the JSON
+ * path expression. If column name is not suffixed by JSON path expression,
then array will contain only a single
+ * element representing the column name. For example:
+ * 1) Identifier "jsonColumn.name.first" -> {"jsonColumn", ".name.first"}
+ * 2) Identifier "jsonColumn[0]" -> {"jsonColumn", "[0]"}
+ * 3) Identifier "jsonColumn" -> {"jsonColumn"}
*/
private static String[] getIdentifierParts(Identifier identifier) {
- return StringUtils.split(identifier.getName(), '.');
+ String name = identifier.getName();
+ int dotIndex = name.indexOf('.');
+ int openBracketIndex = name.indexOf('[');
+
+ // column name followed by top-level array expression.
+ if (openBracketIndex != -1) {
+ // name has an '[', check if this path expression refers to a top-level
JSON array.
+ if (dotIndex == -1 || openBracketIndex < dotIndex) {
+ // This path expression refers to a top-level JSON array.
+ return new String[]{name.substring(0, openBracketIndex),
name.substring(openBracketIndex)};
+ }
+ }
+
+ // column name followed by all other JSON path expression
+ if (dotIndex != -1) {
+ return new String[] {name.substring(0, dotIndex),
name.substring(dotIndex)};
+ }
+
+ // column name without any JSON path expression
+ return new String[] {name};
}
/**
@@ -407,9 +427,7 @@ public class JsonStatementOptimizer implements
StatementOptimizer {
}
builder.append("$");
- for (int i = 1; i < parts.length; i++) {
- builder.append(".").append(parts[i]);
- }
+ builder.append(parts[1]);
if (applyDoubleQuote) {
builder.append("\"");
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java
index 2339ad7..32280a4 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java
@@ -200,4 +200,43 @@ public class JsonStatementOptimizerTest {
"SELECT MAX(JSON_EXTRACT_SCALAR(jsonColumn, '$.id', 'DOUBLE', '" +
Double.NEGATIVE_INFINITY
+ "') - 5) AS \"max(minus(jsonColumn.id,'5'))\" FROM testTable",
TABLE_CONFIG_WITH_INDEX, SCHEMA);
}
+
+ @Test
+ public void testTopLevelArrayPathExpressions() {
+ // SELECT using json path expression with top-level array addressing.
+ TestHelper.assertEqualsQuery("SELECT jsonColumn[0] FROM testTable",
+ "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[0]', 'STRING', 'null') AS
\"jsonColumn[0]\" FROM testTable",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+ TestHelper.assertEqualsQuery("SELECT jsonColumn[0].a FROM testTable",
+ "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[0].a', 'STRING', 'null') AS
\"jsonColumn[0].a\" FROM testTable",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+ TestHelper.assertEqualsQuery("SELECT jsonColumn.a[0] FROM testTable",
+ "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$.a[0]', 'STRING', 'null') AS
\"jsonColumn.a[0]\" FROM testTable",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+ TestHelper.assertEqualsQuery("SELECT jsonColumn[1].i2 FROM testTable WHERE
jsonColumn[1].i2 IS NOT NULL",
+ "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[1].i2', 'STRING', 'null')
AS \"jsonColumn[1].i2\" FROM testTable "
+ + "WHERE JSON_MATCH(jsonColumn, '\"$[1].i2\" IS NOT NULL')",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+ // Use top-level array addressing in json path expression in
JSON_EXTRACT_SCALAR filter.
+ TestHelper.assertEqualsQuery("SELECT * FROM testTable WHERE jsonColumn[2]
IS NOT NULL and jsonColumn[2] = 'test'",
+ "SELECT * FROM testTable WHERE JSON_EXTRACT_SCALAR(jsonColumn, '$[2]',
'JSON', 'null') IS NOT NULL AND "
+ + "JSON_EXTRACT_SCALAR(jsonColumn, '$[2]', 'STRING', 'null') =
'test'",
+ TABLE_CONFIG_WITHOUT_INDEX, SCHEMA);
+
+ // Use top-level array addressing in json path expression in JSON_MATCH
filter
+ TestHelper.assertEqualsQuery("SELECT * FROM testTable WHERE jsonColumn[2]
IS NOT NULL and jsonColumn[2] = 'test'",
+ "SELECT * FROM testTable WHERE JSON_MATCH(jsonColumn, '\"$[2]\" IS NOT
NULL') AND JSON_MATCH(jsonColumn, "
+ + "'\"$[2]\" = ''test''')",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+ // Use top-level array addressing in json path expression in GROUP BY
clause.
+ TestHelper.assertEqualsQuery("SELECT jsonColumn[0], count(*) FROM
testTable GROUP BY jsonColumn[0]",
+ "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[0]', 'STRING', 'null') AS
\"jsonColumn[0]\", count(*) FROM "
+ + "testTable GROUP BY JSON_EXTRACT_SCALAR(jsonColumn, '$[0]',
'STRING', 'null')",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+ }
}
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java
index 8eb4c8d..097e6d9 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java
@@ -141,6 +141,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
+ "[[\"a\",\"b\"],[\"c\",\"d\"]]]}"));
records.add(createRecord(13, 13, "days",
"{\"name\": {\"first\": \"multi-dimensional-1\",\"last\":
\"array\"},\"days\": 111}"));
+ records.add(createRecord(14, 14, "top level array", "[{\"i1\":1,\"i2\":2},
{\"i1\":3,\"i2\":4}]"));
List<String> jsonIndexColumns = new ArrayList<>();
jsonIndexColumns.add("jsonColumn");
@@ -184,12 +185,10 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
// SELECT using a simple json path expression.
Object[][] expecteds1 = {{"duck"}, {"mouse"}, {"duck"}};
checkresult("SELECT jsonColumn.name.last FROM testTable LIMIT 3",
expecteds1);
- //checkresult("SELECT jsonColumnWithoutIndex.name.last FROM testTable
LIMIT 3", expecteds1);
Object[][] expecteds2 =
{{"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"null"},
{"null"}, {"null"}, {"1"}};
checkresult("SELECT jsonColumn.data[0].e[2].z[0].i1 FROM testTable",
expecteds2);
- //checkresult("SELECT jsonColumnWithoutIndex.data[0].e[2].z[0].i1 FROM
testTable", expecteds2);
}
/** Test that a predicate comparing a json path expression with literal is
properly converted into a JSON_MATCH
@@ -197,24 +196,18 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
@Test
public void testJsonFilter() {
// Comparing json path expression with a string value.
- Object[][] expecteds1 = {
- {
- 1,
"{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
+ Object[][] expecteds1 =
+ {{1,
"{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
"{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
1L,
- "daffy duck"
- }
- };
+ "daffy duck"}};
checkresult("SELECT * FROM testTable WHERE jsonColumn.name.first = 'daffy'
LIMIT 1", expecteds1);
checkresult("SELECT * FROM testTable WHERE
jsonColumnWithoutIndex.name.first = 'daffy' LIMIT 1", expecteds1);
// Comparing json path expression with a numerical value.
- Object[][] expecteds2 = {
- {
- 1,
"{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
+ Object[][] expecteds2 =
+ {{1,
"{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
"{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
1L,
- "daffy duck"
- }
- };
+ "daffy duck"}};
checkresult("SELECT * FROM testTable WHERE JSON_MATCH(jsonColumn,
'\"$.id\" = 101') LIMIT 1", expecteds2);
try {
checkresult("SELECT * FROM testTable WHERE
JSON_MATCH(jsonColumnWithoutIndex, '\"$.id\" = 101') LIMIT 1",
@@ -227,19 +220,16 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
// Comparing json path expression with a string value.
Object[][] expecteds3 = {{4L}};
- checkresult(
- "SELECT count(*) FROM testTable WHERE JSON_MATCH(jsonColumn, '\"$.id\"
IS NOT NULL') AND JSON_MATCH"
- + "(jsonColumn, '\"$.id\" = 101')",
- expecteds3);
+ checkresult("SELECT count(*) FROM testTable WHERE JSON_MATCH(jsonColumn,
'\"$.id\" IS NOT NULL') AND JSON_MATCH"
+ + "(jsonColumn, '\"$.id\" = 101')", expecteds3);
}
/** Test that a json path expression in GROUP BY clause is properly
converted into a JSON_EXTRACT_SCALAR function. */
@Test
public void testJsonGroupBy() {
- Object[][] expecteds1 = {
- {"111", 20L}, {"101", 4L}, {"null", 4L}, {"181", 4L}, {"161.5", 4L},
{"171", 4L}, {"161", 4L}, {"141", 4L},
- {"131", 4L}, {"121", 4L}
- };
+ Object[][] expecteds1 =
+ {{"111", 20L}, {"101", 4L}, {"null", 8L}, {"181", 4L}, {"161.5", 4L},
{"171", 4L}, {"161", 4L}, {"141", 4L},
+ {"131", 4L}, {"121", 4L}};
checkresult("SELECT jsonColumn.id, count(*) FROM testTable GROUP BY
jsonColumn.id", expecteds1);
checkresult("SELECT jsonColumnWithoutIndex.id, count(*) FROM testTable
GROUP BY jsonColumnWithoutIndex.id",
expecteds1);
@@ -251,12 +241,10 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
Object[][] expecteds1 = {{"mouse", 8L}};
checkresult(
"SELECT jsonColumn.name.last, count(*) FROM testTable GROUP BY
jsonColumn.name.last HAVING jsonColumn.name"
- + ".last = 'mouse'",
- expecteds1);
+ + ".last = 'mouse'", expecteds1);
checkresult(
"SELECT jsonColumnWithoutIndex.name.last, count(*) FROM testTable
GROUP BY jsonColumnWithoutIndex.name.last "
- + "HAVING jsonColumnWithoutIndex.name.last = 'mouse'",
- expecteds1);
+ + "HAVING jsonColumnWithoutIndex.name.last = 'mouse'", expecteds1);
}
/** Test a complex SQL statement with json path expression in SELECT, WHERE,
and GROUP BY clauses. */
@@ -268,8 +256,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
expecteds1);
checkresult(
"SELECT jsonColumnWithoutIndex.name.last, count(*) FROM testTable
WHERE jsonColumnWithoutIndex.id = 101 GROUP"
- + " BY jsonColumnWithoutIndex.name.last",
- expecteds1);
+ + " BY jsonColumnWithoutIndex.name.last", expecteds1);
}
/** Test an aggregation function over json path expression in SELECT clause.
*/
@@ -287,8 +274,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
try {
checkresult(
"SELECT FROMEPOCHDAYS(jsonColumnWithoutIndex.days) FROM testTable
WHERE jsonColumnWithoutIndex.days IS NULL"
- + " LIMIT 1",
- expecteds2);
+ + " LIMIT 1", expecteds2);
Assert.fail();
} catch (BadQueryRequestException e) {
Assert
@@ -302,8 +288,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
try {
checkresult(
"SELECT FROMEPOCHDAYS(jsonColumnWithoutIndex.days) FROM testTable
WHERE jsonColumnWithoutIndex.days IS NOT "
- + "NULL LIMIT 1",
- expecteds3);
+ + "NULL LIMIT 1", expecteds3);
Assert.fail();
} catch (BadQueryRequestException e) {
Assert.assertEquals(e.getMessage(),
@@ -330,4 +315,23 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
checkresult("SELECT MAX(jsonColumn.id - 5) FROM testTable", expecteds3);
checkresult("SELECT MAX(jsonColumnWithoutIndex.id - 5) FROM testTable",
expecteds3);
}
+
+ @Test
+ public void testTopLevelArrayPathExpressions() {
+ // SELECT using json path expressions that refers to second element of a
top-level array.
+ Object[][] expecteds1 = {{"{\"i1\":3,\"i2\":4}"}, {"{\"i1\":3,\"i2\":4}"},
{"{\"i1\":3,\"i2\":4}"}, {"{\"i1\":3,"
+ + "\"i2\":4}"}};
+ checkresult("SELECT jsonColumn[1] FROM testTable WHERE intColumn=14",
expecteds1);
+
+ // SELECT using json path expressions that refers to item within second
element of a top-level array.
+ Object[][] expecteds2 = {{"4"}, {"4"}, {"4"}, {"4"}};
+ checkresult("SELECT jsonColumn[1].i2 FROM testTable WHERE intColumn=14",
expecteds2);
+
+ // SELECT using json path expression and check path expression for IS NULL.
+ checkresult("SELECT jsonColumn[1].i2 FROM testTable WHERE jsonColumn[1].i2
IS NOT NULL", expecteds2);
+
+ // GROUP BY using a json path expression that refers to a top-level array
element.
+ Object[][] expecteds3 = {{"{\"i1\":3,\"i2\":4}", 4L}, {"null", 56L}};
+ checkresult("SELECT jsonColumn[1], count(*) FROM testTable GROUP BY
jsonColumn[1]", expecteds3);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]