This is an automated email from the ASF dual-hosted git repository.
abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 307b8e33571 feat: json_merge expression and sql function (#17081)
307b8e33571 is described below
commit 307b8e33571ceabe767f37d12514895def6bedd0
Author: Lasse Mammen <[email protected]>
AuthorDate: Tue Sep 17 13:57:34 2024 +0100
feat: json_merge expression and sql function (#17081)
---
docs/querying/math-expr.md | 1 +
docs/querying/sql-functions.md | 7 ++
docs/querying/sql-json-functions.md | 1 +
.../org/apache/druid/guice/ExpressionModule.java | 1 +
.../query/expression/NestedDataExpressions.java | 112 +++++++++++++++++++++
.../expression/NestedDataExpressionsTest.java | 58 +++++++++++
.../builtin/NestedDataOperatorConversions.java | 46 +++++++++
.../sql/calcite/planner/DruidOperatorTable.java | 1 +
.../sql/calcite/CalciteNestedDataQueryTest.java | 50 +++++++++
website/.spelling | 1 +
10 files changed, 278 insertions(+)
diff --git a/docs/querying/math-expr.md b/docs/querying/math-expr.md
index 38ced649c06..0893fc4e236 100644
--- a/docs/querying/math-expr.md
+++ b/docs/querying/math-expr.md
@@ -246,6 +246,7 @@ JSON functions provide facilities to extract, transform,
and create `COMPLEX<jso
| to_json_string(expr) | Convert `expr` into a JSON `STRING` value |
| json_keys(expr, path) | Get array of field names from `expr` at the
specified JSONPath `path`, or null if the data does not exist or have any
fields |
| json_paths(expr) | Get array of all JSONPath paths available from `expr` |
+| json_merge(expr1, expr2[, expr3 ...]) | Merges two or more JSON `STRING` or
`COMPLEX<json>` into one. Preserves the rightmost value when there are key
overlaps. |
### JSONPath syntax
diff --git a/docs/querying/sql-functions.md b/docs/querying/sql-functions.md
index 5cdcbb25495..6859ca67391 100644
--- a/docs/querying/sql-functions.md
+++ b/docs/querying/sql-functions.md
@@ -855,6 +855,13 @@ Returns true if the IPv6 `address` belongs to the `subnet`
literal, else false.
Returns an array of field names from `expr` at the specified `path`.
+## JSON_MERGE
+
+**Function type:** [JSON](sql-json-functions.md)
+
+`JSON_MERGE(expr1, expr2[, expr3 ...])`
+Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the
rightmost value when there are key overlaps. Returning always a `COMPLEX<json>`
type.
+
## JSON_OBJECT
**Function type:** [JSON](sql-json-functions.md)
diff --git a/docs/querying/sql-json-functions.md
b/docs/querying/sql-json-functions.md
index 199c568c29b..35b4f5e3769 100644
--- a/docs/querying/sql-json-functions.md
+++ b/docs/querying/sql-json-functions.md
@@ -38,6 +38,7 @@ You can use the following JSON functions to extract,
transform, and create `COMP
| --- | --- |
|`JSON_KEYS(expr, path)`| Returns an array of field names from `expr` at the
specified `path`.|
|`JSON_OBJECT(KEY expr1 VALUE expr2[, KEY expr3 VALUE expr4, ...])` |
Constructs a new `COMPLEX<json>` object. The `KEY` expressions must evaluate to
string types. The `VALUE` expressions can be composed of any input type,
including other `COMPLEX<json>` values. `JSON_OBJECT` can accept
colon-separated key-value pairs. The following syntax is equivalent:
`JSON_OBJECT(expr1:expr2[, expr3:expr4, ...])`.|
+|`JSON_MERGE(expr1, expr2[, expr3 ...])`| Merges two or more JSON `STRING` or
`COMPLEX<json>` into one. Preserves the rightmost value when there are key
overlaps. Returning always a `COMPLEX<json>` type.|
|`JSON_PATHS(expr)`| Returns an array of all paths which refer to literal
values in `expr` in JSONPath format. |
|`JSON_QUERY(expr, path)`| Extracts a `COMPLEX<json>` value from `expr`, at
the specified `path`. |
|`JSON_QUERY_ARRAY(expr, path)`| Extracts an `ARRAY<COMPLEX<json>>` value from
`expr` at the specified `path`. If value is not an `ARRAY`, it gets translated
into a single element `ARRAY` containing the value at `path`. The primary use
of this function is to extract arrays of objects to use as inputs to other
[array functions](./sql-array-functions.md).|
diff --git
a/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java
b/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java
index 917cf967f14..e1064234e56 100644
--- a/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java
+++ b/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java
@@ -81,6 +81,7 @@ public class ExpressionModule implements Module
.add(HyperUniqueExpressions.HllEstimateExprMacro.class)
.add(HyperUniqueExpressions.HllRoundEstimateExprMacro.class)
.add(NestedDataExpressions.JsonObjectExprMacro.class)
+ .add(NestedDataExpressions.JsonMergeExprMacro.class)
.add(NestedDataExpressions.JsonKeysExprMacro.class)
.add(NestedDataExpressions.JsonPathsExprMacro.class)
.add(NestedDataExpressions.JsonValueExprMacro.class)
diff --git
a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java
b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java
index 873b4f83188..0926ce78e0a 100644
---
a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java
+++
b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java
@@ -22,6 +22,7 @@ package org.apache.druid.query.expression;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
import org.apache.druid.guice.annotations.Json;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
@@ -99,6 +100,117 @@ public class NestedDataExpressions
}
}
+ public static class JsonMergeExprMacro implements ExprMacroTable.ExprMacro
+ {
+ public static final String NAME = "json_merge";
+
+ private final ObjectMapper jsonMapper;
+
+ @Inject
+ public JsonMergeExprMacro(
+ @Json ObjectMapper jsonMapper
+ )
+ {
+ this.jsonMapper = jsonMapper;
+ }
+
+ @Override
+ public String name()
+ {
+ return NAME;
+ }
+
+ @Override
+ public Expr apply(List<Expr> args)
+ {
+ if (args.size() < 2) {
+ throw validationFailed("must have at least two arguments");
+ }
+
+ final class ParseJsonExpr extends
ExprMacroTable.BaseScalarMacroFunctionExpr
+ {
+ public ParseJsonExpr(List<Expr> args)
+ {
+ super(JsonMergeExprMacro.this, args);
+ }
+
+ @Override
+ public ExprEval eval(ObjectBinding bindings)
+ {
+ ExprEval arg = args.get(0).eval(bindings);
+ Object obj;
+
+ if (arg.value() == null) {
+ throw JsonMergeExprMacro.this.validationFailed(
+ "invalid input expected %s but got %s instead",
+ ExpressionType.STRING,
+ arg.type()
+ );
+ }
+
+ try {
+ obj = jsonMapper.readValue(getArgAsJson(arg), Object.class);
+ }
+ catch (JsonProcessingException e) {
+ throw JsonMergeExprMacro.this.processingFailed(e, "bad string
input [%s]", arg.asString());
+ }
+
+ ObjectReader updater = jsonMapper.readerForUpdating(obj);
+
+ for (int i = 1; i < args.size(); i++) {
+ ExprEval argSub = args.get(i).eval(bindings);
+
+ try {
+ String str = getArgAsJson(argSub);
+ if (str != null) {
+ obj = updater.readValue(str);
+ }
+ }
+ catch (JsonProcessingException e) {
+ throw JsonMergeExprMacro.this.processingFailed(e, "bad string
input [%s]", argSub.asString());
+ }
+ }
+
+ return ExprEval.ofComplex(ExpressionType.NESTED_DATA, obj);
+ }
+
+ @Nullable
+ @Override
+ public ExpressionType getOutputType(InputBindingInspector inspector)
+ {
+ return ExpressionType.NESTED_DATA;
+ }
+
+ private String getArgAsJson(ExprEval arg)
+ {
+ if (arg.value() == null) {
+ return null;
+ }
+
+ if (arg.type().is(ExprType.STRING)) {
+ return arg.asString();
+ }
+
+ if (arg.type().is(ExprType.COMPLEX)) {
+ try {
+ return jsonMapper.writeValueAsString(unwrap(arg));
+ }
+ catch (JsonProcessingException e) {
+ throw JsonMergeExprMacro.this.processingFailed(e, "bad complex
input [%s]", arg.asString());
+ }
+ }
+
+ throw JsonMergeExprMacro.this.validationFailed(
+ "invalid input expected %s but got %s instead",
+ ExpressionType.STRING,
+ arg.type()
+ );
+ }
+ }
+ return new ParseJsonExpr(args);
+ }
+ }
+
public static class ToJsonStringExprMacro implements ExprMacroTable.ExprMacro
{
public static final String NAME = "to_json_string";
diff --git
a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java
b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java
index b14edb2d17b..c9fe553469a 100644
---
a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java
+++
b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java
@@ -49,6 +49,7 @@ public class NestedDataExpressionsTest extends
InitializedNullHandlingTest
new NestedDataExpressions.JsonPathsExprMacro(),
new NestedDataExpressions.JsonKeysExprMacro(),
new NestedDataExpressions.JsonObjectExprMacro(),
+ new NestedDataExpressions.JsonMergeExprMacro(JSON_MAPPER),
new NestedDataExpressions.JsonValueExprMacro(),
new NestedDataExpressions.JsonQueryExprMacro(),
new NestedDataExpressions.JsonQueryArrayExprMacro(),
@@ -112,6 +113,63 @@ public class NestedDataExpressionsTest extends
InitializedNullHandlingTest
Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map)
eval.value()).get("y"));
}
+ @Test
+ public void testJsonMergeExpression() throws JsonProcessingException
+ {
+ Expr expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}')",
MACRO_TABLE);
+ ExprEval eval = expr.eval(inputBindings);
+ Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}",
JSON_MAPPER.writeValueAsString(eval.value()));
+ Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+
+ expr = Parser.parse("json_merge('{\"a\":\"x\"}', null)", MACRO_TABLE);
+ eval = expr.eval(inputBindings);
+ Assert.assertEquals("{\"a\":\"x\"}",
JSON_MAPPER.writeValueAsString(eval.value()));
+ Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+
+ expr =
Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}','{\"c\":[1,2,3]}')",
MACRO_TABLE);
+ eval = expr.eval(inputBindings);
+ Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\",\"c\":[1,2,3]}",
JSON_MAPPER.writeValueAsString(eval.value()));
+ Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+
+ expr = Parser.parse("json_merge(json_object('a', 'x'),json_object('b',
'y'))", MACRO_TABLE);
+ eval = expr.eval(inputBindings);
+ Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}",
JSON_MAPPER.writeValueAsString(eval.value()));
+ Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+
+ expr =
Parser.parse("json_merge('{\"a\":\"x\"}',json_merge('{\"a\":\"z\"}','{\"a\":\"y\"}'))",
MACRO_TABLE);
+ eval = expr.eval(inputBindings);
+ Assert.assertEquals("{\"a\":\"y\"}",
JSON_MAPPER.writeValueAsString(eval.value()));
+ Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+
+ expr = Parser.parse("json_merge('[\"a\", \"b\"]', '[\"c\", \"d\"]')",
MACRO_TABLE);
+ eval = expr.eval(inputBindings);
+ Assert.assertEquals("[\"a\",\"b\",\"c\",\"d\"]",
JSON_MAPPER.writeValueAsString(eval.value()));
+ Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+ }
+
+ @Test
+ public void testJsonMergeOverflow() throws JsonProcessingException
+ {
+ Expr.ObjectBinding input1 = InputBindings.forInputSuppliers(
+ new ImmutableMap.Builder<String, InputBindings.InputSupplier<?>>()
+ .put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA,
() -> ImmutableMap.of("key", "blah", "value", "blahblah")))
+ .build()
+ );
+ Expr.ObjectBinding input2 = InputBindings.forInputSuppliers(
+ new ImmutableMap.Builder<String, InputBindings.InputSupplier<?>>()
+ .put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA,
() -> ImmutableMap.of("key", "blah2", "value", "blahblah2")))
+ .build()
+ );
+
+ Expr expr = Parser.parse("json_merge(json_object(),
json_object(json_value(attr, '$.key'), json_value(attr, '$.value')))",
MACRO_TABLE);
+ ExprEval eval = expr.eval(input1);
+ Assert.assertEquals("{\"blah\":\"blahblah\"}",
JSON_MAPPER.writeValueAsString(eval.value()));
+ Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+ eval = expr.eval(input2);
+ Assert.assertEquals("{\"blah2\":\"blahblah2\"}",
JSON_MAPPER.writeValueAsString(eval.value()));
+ Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+ }
+
@Test
public void testJsonKeysExpression()
{
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
index a6006046553..9c6cfb0448b 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
@@ -780,6 +780,52 @@ public class NestedDataOperatorConversions
}
}
+ public static class JsonMergeOperatorConversion implements
SqlOperatorConversion
+ {
+ private static final String FUNCTION_NAME = "json_merge";
+ private static final SqlFunction SQL_FUNCTION = OperatorConversions
+ .operatorBuilder(FUNCTION_NAME)
+
.operandTypeChecker(OperandTypes.variadic(SqlOperandCountRanges.from(1)))
+ .operandTypeInference((callBinding, returnType, operandTypes) -> {
+ RelDataTypeFactory typeFactory = callBinding.getTypeFactory();
+ for (int i = 0; i < operandTypes.length; i++) {
+ operandTypes[i] = typeFactory.createTypeWithNullability(
+ typeFactory.createSqlType(SqlTypeName.ANY),
+ true
+ );
+ }
+ })
+ .returnTypeInference(NESTED_RETURN_TYPE_INFERENCE)
+ .functionCategory(SqlFunctionCategory.SYSTEM)
+ .build();
+
+ @Override
+ public SqlOperator calciteOperator()
+ {
+ return SQL_FUNCTION;
+ }
+
+ @Nullable
+ @Override
+ public DruidExpression toDruidExpression(
+ PlannerContext plannerContext,
+ RowSignature rowSignature,
+ RexNode rexNode
+ )
+ {
+ return OperatorConversions.convertCall(
+ plannerContext,
+ rowSignature,
+ rexNode,
+ druidExpressions -> DruidExpression.ofExpression(
+ ColumnType.NESTED_DATA,
+ DruidExpression.functionCall("json_merge"),
+ druidExpressions
+ )
+ );
+ }
+ }
+
public static class ToJsonStringOperatorConversion implements
SqlOperatorConversion
{
private static final String FUNCTION_NAME = "to_json_string";
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
index 27efe16270e..0fb1c9fb9ff 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
@@ -353,6 +353,7 @@ public class DruidOperatorTable implements SqlOperatorTable
.add(new
NestedDataOperatorConversions.JsonValueReturningArrayDoubleOperatorConversion())
.add(new
NestedDataOperatorConversions.JsonValueReturningArrayVarcharOperatorConversion())
.add(new
NestedDataOperatorConversions.JsonObjectOperatorConversion())
+ .add(new
NestedDataOperatorConversions.JsonMergeOperatorConversion())
.add(new
NestedDataOperatorConversions.ToJsonStringOperatorConversion())
.add(new
NestedDataOperatorConversions.ParseJsonOperatorConversion())
.add(new
NestedDataOperatorConversions.TryParseJsonOperatorConversion())
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index 88ad05a4a27..b03b6698b1d 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -69,6 +69,7 @@ import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
+import org.apache.druid.segment.nested.NestedPathField;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
import
org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
@@ -4920,6 +4921,55 @@ public class CalciteNestedDataQueryTest extends
BaseCalciteQueryTest
);
}
+ @Test
+ public void testJsonMerging()
+ {
+ testQuery(
+ "SELECT "
+ + "JSON_MERGE('{\"a\":\"x\"}',JSON_OBJECT(KEY 'x' VALUE
JSON_VALUE(nest, '$.x')))\n"
+ + "FROM druid.nested",
+ ImmutableList.of(
+ Druids.newScanQueryBuilder()
+ .dataSource(DATA_SOURCE)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .virtualColumns(
+ new ExpressionVirtualColumn(
+ "v0",
+
"json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',json_object('x',\"v1\"))",
+ ColumnType.NESTED_DATA,
+ queryFramework().macroTable()
+ ),
+ new NestedFieldVirtualColumn(
+ "nest",
+ "v1",
+ ColumnType.STRING,
+ ImmutableList.of(
+ new NestedPathField("x")
+ ),
+ false,
+ null,
+ false
+ )
+ )
+ .columns("v0")
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
+ new Object[]{"{\"a\":\"x\",\"x\":null}"},
+ new Object[]{"{\"a\":\"x\",\"x\":\"200\"}"},
+ new Object[]{"{\"a\":\"x\",\"x\":null}"},
+ new Object[]{"{\"a\":\"x\",\"x\":null}"},
+ new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
+ new Object[]{"{\"a\":\"x\",\"x\":null}"}
+ ),
+ RowSignature.builder()
+ .add("EXPR$0", ColumnType.NESTED_DATA)
+ .build()
+ );
+ }
+
@Test
public void testCompositionTyping()
{
diff --git a/website/.spelling b/website/.spelling
index 8175755f804..894cd40d959 100644
--- a/website/.spelling
+++ b/website/.spelling
@@ -384,6 +384,7 @@ json_paths
json_query
json_query_array
json_value
+json_merge
karlkfi
kbps
kerberos
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]