This is an automated email from the ASF dual-hosted git repository.

abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 307b8e33571 feat: json_merge expression and sql function (#17081)
307b8e33571 is described below

commit 307b8e33571ceabe767f37d12514895def6bedd0
Author: Lasse Mammen <[email protected]>
AuthorDate: Tue Sep 17 13:57:34 2024 +0100

    feat: json_merge expression and sql function (#17081)
---
 docs/querying/math-expr.md                         |   1 +
 docs/querying/sql-functions.md                     |   7 ++
 docs/querying/sql-json-functions.md                |   1 +
 .../org/apache/druid/guice/ExpressionModule.java   |   1 +
 .../query/expression/NestedDataExpressions.java    | 112 +++++++++++++++++++++
 .../expression/NestedDataExpressionsTest.java      |  58 +++++++++++
 .../builtin/NestedDataOperatorConversions.java     |  46 +++++++++
 .../sql/calcite/planner/DruidOperatorTable.java    |   1 +
 .../sql/calcite/CalciteNestedDataQueryTest.java    |  50 +++++++++
 website/.spelling                                  |   1 +
 10 files changed, 278 insertions(+)

diff --git a/docs/querying/math-expr.md b/docs/querying/math-expr.md
index 38ced649c06..0893fc4e236 100644
--- a/docs/querying/math-expr.md
+++ b/docs/querying/math-expr.md
@@ -246,6 +246,7 @@ JSON functions provide facilities to extract, transform, 
and create `COMPLEX<jso
 | to_json_string(expr) | Convert `expr` into a JSON `STRING` value |
 | json_keys(expr, path) | Get array of field names from `expr` at the 
specified JSONPath `path`, or null if the data does not exist or have any 
fields |
 | json_paths(expr) | Get array of all JSONPath paths available from `expr` |
+| json_merge(expr1, expr2[, expr3 ...]) | Merges two or more JSON `STRING` or 
`COMPLEX<json>` into one. Preserves the rightmost value when there are key 
overlaps. |
 
 ### JSONPath syntax
 
diff --git a/docs/querying/sql-functions.md b/docs/querying/sql-functions.md
index 5cdcbb25495..6859ca67391 100644
--- a/docs/querying/sql-functions.md
+++ b/docs/querying/sql-functions.md
@@ -855,6 +855,13 @@ Returns true if the IPv6 `address` belongs to the `subnet` 
literal, else false.
 
 Returns an array of field names from `expr` at the specified `path`.
 
+## JSON_MERGE
+
+**Function type:** [JSON](sql-json-functions.md)
+
+`JSON_MERGE(expr1, expr2[, expr3 ...])`
+Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the 
rightmost value when there are key overlaps. Returning always a `COMPLEX<json>` 
type.
+
 ## JSON_OBJECT
 
 **Function type:** [JSON](sql-json-functions.md)
diff --git a/docs/querying/sql-json-functions.md 
b/docs/querying/sql-json-functions.md
index 199c568c29b..35b4f5e3769 100644
--- a/docs/querying/sql-json-functions.md
+++ b/docs/querying/sql-json-functions.md
@@ -38,6 +38,7 @@ You can use the following JSON functions to extract, 
transform, and create `COMP
 | --- | --- |
 |`JSON_KEYS(expr, path)`| Returns an array of field names from `expr` at the 
specified `path`.|
 |`JSON_OBJECT(KEY expr1 VALUE expr2[, KEY expr3 VALUE expr4, ...])` | 
Constructs a new `COMPLEX<json>` object. The `KEY` expressions must evaluate to 
string types. The `VALUE` expressions can be composed of any input type, 
including other `COMPLEX<json>` values. `JSON_OBJECT` can accept 
colon-separated key-value pairs. The following syntax is equivalent: 
`JSON_OBJECT(expr1:expr2[, expr3:expr4, ...])`.|
+|`JSON_MERGE(expr1, expr2[, expr3 ...])`| Merges two or more JSON `STRING` or 
`COMPLEX<json>` into one. Preserves the rightmost value when there are key 
overlaps. Returning always a `COMPLEX<json>` type.|
 |`JSON_PATHS(expr)`| Returns an array of all paths which refer to literal 
values in `expr` in JSONPath format. |
 |`JSON_QUERY(expr, path)`| Extracts a `COMPLEX<json>` value from `expr`, at 
the specified `path`. |
 |`JSON_QUERY_ARRAY(expr, path)`| Extracts an `ARRAY<COMPLEX<json>>` value from 
`expr` at the specified `path`. If value is not an `ARRAY`, it gets translated 
into a single element `ARRAY` containing the value at `path`. The primary use 
of this function is to extract arrays of objects to use as inputs to other 
[array functions](./sql-array-functions.md).|
diff --git 
a/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java 
b/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java
index 917cf967f14..e1064234e56 100644
--- a/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java
+++ b/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java
@@ -81,6 +81,7 @@ public class ExpressionModule implements Module
                    .add(HyperUniqueExpressions.HllEstimateExprMacro.class)
                    .add(HyperUniqueExpressions.HllRoundEstimateExprMacro.class)
                    .add(NestedDataExpressions.JsonObjectExprMacro.class)
+                   .add(NestedDataExpressions.JsonMergeExprMacro.class)
                    .add(NestedDataExpressions.JsonKeysExprMacro.class)
                    .add(NestedDataExpressions.JsonPathsExprMacro.class)
                    .add(NestedDataExpressions.JsonValueExprMacro.class)
diff --git 
a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java
 
b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java
index 873b4f83188..0926ce78e0a 100644
--- 
a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java
+++ 
b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java
@@ -22,6 +22,7 @@ package org.apache.druid.query.expression;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
 import org.apache.druid.guice.annotations.Json;
 import org.apache.druid.math.expr.Expr;
 import org.apache.druid.math.expr.ExprEval;
@@ -99,6 +100,117 @@ public class NestedDataExpressions
     }
   }
 
+  public static class JsonMergeExprMacro implements ExprMacroTable.ExprMacro
+  {
+    public static final String NAME = "json_merge";
+
+    private final ObjectMapper jsonMapper;
+
+    @Inject
+    public JsonMergeExprMacro(
+        @Json ObjectMapper jsonMapper
+    )
+    {
+      this.jsonMapper = jsonMapper;
+    }
+
+    @Override
+    public String name()
+    {
+      return NAME;
+    }
+
+    @Override
+    public Expr apply(List<Expr> args)
+    {
+      if (args.size() < 2) {
+        throw validationFailed("must have at least two arguments");
+      }
+
+      final class ParseJsonExpr extends 
ExprMacroTable.BaseScalarMacroFunctionExpr
+      {
+        public ParseJsonExpr(List<Expr> args)
+        {
+          super(JsonMergeExprMacro.this, args);
+        }
+
+        @Override
+        public ExprEval eval(ObjectBinding bindings)
+        {
+          ExprEval arg = args.get(0).eval(bindings);
+          Object obj;
+
+          if (arg.value() == null) {
+            throw JsonMergeExprMacro.this.validationFailed(
+              "invalid input expected %s but got %s instead",
+                ExpressionType.STRING,
+                arg.type()
+            );
+          }
+
+          try {
+            obj = jsonMapper.readValue(getArgAsJson(arg), Object.class);
+          }
+          catch (JsonProcessingException e) {
+            throw JsonMergeExprMacro.this.processingFailed(e, "bad string 
input [%s]", arg.asString());
+          }
+
+          ObjectReader updater = jsonMapper.readerForUpdating(obj);
+
+          for (int i = 1; i < args.size(); i++) {
+            ExprEval argSub = args.get(i).eval(bindings);
+            
+            try {
+              String str = getArgAsJson(argSub);
+              if (str != null) {
+                obj = updater.readValue(str);
+              }
+            }
+            catch (JsonProcessingException e) {
+              throw JsonMergeExprMacro.this.processingFailed(e, "bad string 
input [%s]", argSub.asString());
+            }
+          }
+
+          return ExprEval.ofComplex(ExpressionType.NESTED_DATA, obj);
+        }
+
+        @Nullable
+        @Override
+        public ExpressionType getOutputType(InputBindingInspector inspector)
+        {
+          return ExpressionType.NESTED_DATA;
+        }
+
+        private String getArgAsJson(ExprEval arg)
+        {
+          if (arg.value() == null) {
+            return null;
+          }
+
+          if (arg.type().is(ExprType.STRING)) {
+            return arg.asString();
+          } 
+          
+          if (arg.type().is(ExprType.COMPLEX)) {
+            try {
+              return jsonMapper.writeValueAsString(unwrap(arg));
+            }
+            catch (JsonProcessingException e) {
+              throw JsonMergeExprMacro.this.processingFailed(e, "bad complex 
input [%s]", arg.asString());
+            } 
+          } 
+          
+          throw JsonMergeExprMacro.this.validationFailed(
+            "invalid input expected %s but got %s instead",
+            ExpressionType.STRING,
+            arg.type()
+          );
+        }
+      }
+      return new ParseJsonExpr(args);
+    }
+  }
+
   public static class ToJsonStringExprMacro implements ExprMacroTable.ExprMacro
   {
     public static final String NAME = "to_json_string";
diff --git 
a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java
 
b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java
index b14edb2d17b..c9fe553469a 100644
--- 
a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java
@@ -49,6 +49,7 @@ public class NestedDataExpressionsTest extends 
InitializedNullHandlingTest
           new NestedDataExpressions.JsonPathsExprMacro(),
           new NestedDataExpressions.JsonKeysExprMacro(),
           new NestedDataExpressions.JsonObjectExprMacro(),
+          new NestedDataExpressions.JsonMergeExprMacro(JSON_MAPPER),
           new NestedDataExpressions.JsonValueExprMacro(),
           new NestedDataExpressions.JsonQueryExprMacro(),
           new NestedDataExpressions.JsonQueryArrayExprMacro(),
@@ -112,6 +113,63 @@ public class NestedDataExpressionsTest extends 
InitializedNullHandlingTest
     Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) 
eval.value()).get("y"));
   }
 
+  @Test
+  public void testJsonMergeExpression() throws JsonProcessingException
+  {
+    Expr expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}')", 
MACRO_TABLE);
+    ExprEval eval = expr.eval(inputBindings);
+    Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", 
JSON_MAPPER.writeValueAsString(eval.value()));
+    Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+
+    expr = Parser.parse("json_merge('{\"a\":\"x\"}', null)", MACRO_TABLE);
+    eval = expr.eval(inputBindings);
+    Assert.assertEquals("{\"a\":\"x\"}", 
JSON_MAPPER.writeValueAsString(eval.value()));
+    Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+
+    expr = 
Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}','{\"c\":[1,2,3]}')", 
MACRO_TABLE);
+    eval = expr.eval(inputBindings);
+    Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\",\"c\":[1,2,3]}", 
JSON_MAPPER.writeValueAsString(eval.value()));
+    Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+
+    expr = Parser.parse("json_merge(json_object('a', 'x'),json_object('b', 
'y'))", MACRO_TABLE);
+    eval = expr.eval(inputBindings);
+    Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", 
JSON_MAPPER.writeValueAsString(eval.value()));
+    Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+
+    expr = 
Parser.parse("json_merge('{\"a\":\"x\"}',json_merge('{\"a\":\"z\"}','{\"a\":\"y\"}'))",
 MACRO_TABLE);
+    eval = expr.eval(inputBindings);
+    Assert.assertEquals("{\"a\":\"y\"}", 
JSON_MAPPER.writeValueAsString(eval.value()));
+    Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+
+    expr = Parser.parse("json_merge('[\"a\", \"b\"]', '[\"c\", \"d\"]')", 
MACRO_TABLE);
+    eval = expr.eval(inputBindings);
+    Assert.assertEquals("[\"a\",\"b\",\"c\",\"d\"]", 
JSON_MAPPER.writeValueAsString(eval.value()));
+    Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+  }
+
+  @Test
+  public void testJsonMergeOverflow() throws JsonProcessingException
+  {
+    Expr.ObjectBinding input1 = InputBindings.forInputSuppliers(
+        new ImmutableMap.Builder<String, InputBindings.InputSupplier<?>>()
+          .put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, 
() -> ImmutableMap.of("key", "blah", "value", "blahblah")))
+          .build()
+    );
+    Expr.ObjectBinding input2 = InputBindings.forInputSuppliers(
+        new ImmutableMap.Builder<String, InputBindings.InputSupplier<?>>()
+          .put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, 
() -> ImmutableMap.of("key", "blah2", "value", "blahblah2")))
+          .build()
+    );
+
+    Expr expr = Parser.parse("json_merge(json_object(), 
json_object(json_value(attr, '$.key'), json_value(attr, '$.value')))", 
MACRO_TABLE);
+    ExprEval eval = expr.eval(input1);
+    Assert.assertEquals("{\"blah\":\"blahblah\"}", 
JSON_MAPPER.writeValueAsString(eval.value()));
+    Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+    eval = expr.eval(input2);
+    Assert.assertEquals("{\"blah2\":\"blahblah2\"}", 
JSON_MAPPER.writeValueAsString(eval.value()));
+    Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
+  }
+
   @Test
   public void testJsonKeysExpression()
   {
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
index a6006046553..9c6cfb0448b 100644
--- 
a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
+++ 
b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java
@@ -780,6 +780,52 @@ public class NestedDataOperatorConversions
     }
   }
 
+  public static class JsonMergeOperatorConversion implements 
SqlOperatorConversion
+  {
+    private static final String FUNCTION_NAME = "json_merge";
+    private static final SqlFunction SQL_FUNCTION = OperatorConversions
+        .operatorBuilder(FUNCTION_NAME)
+        
.operandTypeChecker(OperandTypes.variadic(SqlOperandCountRanges.from(1)))
+        .operandTypeInference((callBinding, returnType, operandTypes) -> {
+          RelDataTypeFactory typeFactory = callBinding.getTypeFactory();
+          for (int i = 0; i < operandTypes.length; i++) {
+            operandTypes[i] = typeFactory.createTypeWithNullability(
+                typeFactory.createSqlType(SqlTypeName.ANY),
+                true
+            );
+          }
+        })
+        .returnTypeInference(NESTED_RETURN_TYPE_INFERENCE)
+        .functionCategory(SqlFunctionCategory.SYSTEM)
+        .build();
+
+    @Override
+    public SqlOperator calciteOperator()
+    {
+      return SQL_FUNCTION;
+    }
+
+    @Nullable
+    @Override
+    public DruidExpression toDruidExpression(
+        PlannerContext plannerContext,
+        RowSignature rowSignature,
+        RexNode rexNode
+    )
+    {
+      return OperatorConversions.convertCall(
+          plannerContext,
+          rowSignature,
+          rexNode,
+          druidExpressions -> DruidExpression.ofExpression(
+              ColumnType.NESTED_DATA,
+              DruidExpression.functionCall("json_merge"),
+              druidExpressions
+          )
+      );
+    }
+  }
+
   public static class ToJsonStringOperatorConversion implements 
SqlOperatorConversion
   {
     private static final String FUNCTION_NAME = "to_json_string";
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
 
b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
index 27efe16270e..0fb1c9fb9ff 100644
--- 
a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
+++ 
b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java
@@ -353,6 +353,7 @@ public class DruidOperatorTable implements SqlOperatorTable
                    .add(new 
NestedDataOperatorConversions.JsonValueReturningArrayDoubleOperatorConversion())
                    .add(new 
NestedDataOperatorConversions.JsonValueReturningArrayVarcharOperatorConversion())
                    .add(new 
NestedDataOperatorConversions.JsonObjectOperatorConversion())
+                   .add(new 
NestedDataOperatorConversions.JsonMergeOperatorConversion())
                    .add(new 
NestedDataOperatorConversions.ToJsonStringOperatorConversion())
                    .add(new 
NestedDataOperatorConversions.ParseJsonOperatorConversion())
                    .add(new 
NestedDataOperatorConversions.TryParseJsonOperatorConversion())
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
index 88ad05a4a27..b03b6698b1d 100644
--- 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
+++ 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java
@@ -69,6 +69,7 @@ import org.apache.druid.segment.column.RowSignature;
 import org.apache.druid.segment.incremental.IncrementalIndex;
 import org.apache.druid.segment.incremental.IncrementalIndexSchema;
 import org.apache.druid.segment.join.JoinableFactoryWrapper;
+import org.apache.druid.segment.nested.NestedPathField;
 import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
 import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
 import 
org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
@@ -4920,6 +4921,55 @@ public class CalciteNestedDataQueryTest extends 
BaseCalciteQueryTest
     );
   }
 
+  @Test
+  public void testJsonMerging()
+  {
+    testQuery(
+        "SELECT "
+        + "JSON_MERGE('{\"a\":\"x\"}',JSON_OBJECT(KEY 'x' VALUE 
JSON_VALUE(nest, '$.x')))\n"
+        + "FROM druid.nested",
+        ImmutableList.of(
+            Druids.newScanQueryBuilder()
+                  .dataSource(DATA_SOURCE)
+                  .intervals(querySegmentSpec(Filtration.eternity()))
+                  .virtualColumns(
+                      new ExpressionVirtualColumn(
+                          "v0",
+                          
"json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',json_object('x',\"v1\"))",
+                          ColumnType.NESTED_DATA,
+                          queryFramework().macroTable()
+                      ),
+                      new NestedFieldVirtualColumn(
+                          "nest",
+                          "v1",
+                          ColumnType.STRING,
+                          ImmutableList.of(
+                            new NestedPathField("x")
+                          ),
+                          false,
+                          null,
+                          false
+                      )
+                  )
+                  .columns("v0")
+                  
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+                  .build()
+        ),
+        ImmutableList.of(
+            new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
+            new Object[]{"{\"a\":\"x\",\"x\":null}"},
+            new Object[]{"{\"a\":\"x\",\"x\":\"200\"}"},
+            new Object[]{"{\"a\":\"x\",\"x\":null}"},
+            new Object[]{"{\"a\":\"x\",\"x\":null}"},
+            new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
+            new Object[]{"{\"a\":\"x\",\"x\":null}"}
+        ),
+        RowSignature.builder()
+                    .add("EXPR$0", ColumnType.NESTED_DATA)
+                    .build()
+    );
+  }
+
   @Test
   public void testCompositionTyping()
   {
diff --git a/website/.spelling b/website/.spelling
index 8175755f804..894cd40d959 100644
--- a/website/.spelling
+++ b/website/.spelling
@@ -384,6 +384,7 @@ json_paths
 json_query
 json_query_array
 json_value
+json_merge
 karlkfi
 kbps
 kerberos


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to