Repository: tajo Updated Branches: refs/heads/master 903151ead -> 9114485b5
TAJO-1754: Implement several functions to handle json array. Closes #687 Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/9114485b Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/9114485b Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/9114485b Branch: refs/heads/master Commit: 9114485b594b89616f240267fc3b3cf5d2a02370 Parents: 903151e Author: Jihoon Son <[email protected]> Authored: Thu Aug 13 22:33:19 2015 +0900 Committer: Jihoon Son <[email protected]> Committed: Thu Aug 13 22:33:19 2015 +0900 ---------------------------------------------------------------------- CHANGES | 2 + .../engine/function/json/JsonArrayContains.java | 103 +++++++++++++++++++ .../tajo/engine/function/json/JsonArrayGet.java | 82 +++++++++++++++ .../engine/function/json/JsonArrayLength.java | 67 ++++++++++++ .../function/json/JsonExtractPathText.java | 12 +-- .../function/json/ScalarJsonFunction.java | 38 +++++++ .../tajo/engine/function/TestJsonFunctions.java | 31 ++++++ .../src/main/sphinx/functions/json_func.rst | 55 +++++++++- 8 files changed, 377 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/9114485b/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 4cdd3bd..3498cc3 100644 --- a/CHANGES +++ b/CHANGES @@ -474,6 +474,8 @@ Release 0.11.0 - unreleased SUB TASKS + TAJO-1754: Implement several functions to handle json array. (jihoon) + TAJO-1748: Refine client APIs to throw specific exceptions. (hyunsik) TAJO-1735: Implement MetadataProvider and LinkedMetadataManager. (hyunsik) http://git-wip-us.apache.org/repos/asf/tajo/blob/9114485b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayContains.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayContains.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayContains.java new file mode 100644 index 0000000..c0287a9 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayContains.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.json; + +import net.minidev.json.JSONArray; +import net.minidev.json.parser.ParseException; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.datum.AnyDatum; +import org.apache.tajo.datum.BooleanDatum; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; +import org.apache.tajo.storage.Tuple; + +@Description( + functionName = "json_array_contains", + description = "Determine if the given value exists in the JSON array", + detail = "Determine if the given value exists in the JSON array.", + example = "> SELECT json_array_contains('[100, 200, 300]', 100);\n" + + "t", + returnType = Type.BOOLEAN, + paramTypes = {@ParamTypes(paramTypes = {Type.TEXT, Type.ANY})} +) +public class JsonArrayContains extends ScalarJsonFunction { + private boolean isLong; + private boolean isDouble; + private boolean isFirst = true; + + public JsonArrayContains() { + super(new Column[]{ + new Column("json_array", TajoDataTypes.Type.TEXT), + new Column("index", TajoDataTypes.Type.ANY) + }); + } + + @Override + public Datum eval(Tuple params) { + if (params.isBlankOrNull(0) || params.isBlankOrNull(1)) { + return NullDatum.get(); + } + + try { + Object parsed = parser.parse(params.getBytes(0)); + if (parsed instanceof JSONArray) { + JSONArray array = (JSONArray) parsed; + Datum actualParam = ((AnyDatum)params.asDatum(1)).getActual(); + + if (isFirst) { + isFirst = false; + if (actualParam.type() == TajoDataTypes.Type.INT1 || + actualParam.type() == TajoDataTypes.Type.INT2 || + actualParam.type() == TajoDataTypes.Type.INT4 || + actualParam.type() == TajoDataTypes.Type.INT8) { + isLong = true; + } else if (actualParam.type() == TajoDataTypes.Type.FLOAT4 || + actualParam.type() == TajoDataTypes.Type.FLOAT8) { + isDouble = true; + } + } + + for (Object eachElem : array) { + if (isLong && eachElem instanceof Long) { + if (actualParam.asInt8() == (Long)eachElem) { + return BooleanDatum.TRUE; + } + } else if (isDouble && eachElem instanceof Double) { + if (actualParam.asFloat8() == (Double)eachElem) { + return BooleanDatum.TRUE; + } + } else { + if (actualParam.asChars().equals(eachElem.toString())) { + return BooleanDatum.TRUE; + } + } + } + return BooleanDatum.FALSE; + } else { + return NullDatum.get(); + } + } catch (ParseException e) { + return NullDatum.get(); + } + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/9114485b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java new file mode 100644 index 0000000..412337e --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.json; + +import net.minidev.json.JSONArray; +import net.minidev.json.parser.ParseException; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; +import org.apache.tajo.storage.Tuple; + +@Description( + functionName = "json_array_get", + description = "Returns the element at the specified index into the JSON array", + detail = "Returns the element at the specified index into the JSON array.\n " + + "This function returns an element indexed from the end of an array with a negative index,\n " + + "and null if the element at the specified index doesnât exist.", + example = "> SELECT json_array_get('[100, 200, 300]', 1);\n" + + "200", + returnType = TajoDataTypes.Type.TEXT, + paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.TEXT, TajoDataTypes.Type.INT4})} +) +public class JsonArrayGet extends ScalarJsonFunction { + private int realIndex = Integer.MAX_VALUE; + + public JsonArrayGet() { + super(new Column[]{ + new Column("json_array", TajoDataTypes.Type.TEXT), + new Column("index", TajoDataTypes.Type.INT4) + }); + } + + @Override + public Datum eval(Tuple params) { + if (params.isBlankOrNull(0) || params.isBlankOrNull(1)) { + return NullDatum.get(); + } + + try { + Object parsed = parser.parse(params.getBytes(0)); + if (parsed instanceof JSONArray) { + JSONArray array = (JSONArray) parsed; + if (realIndex == Integer.MAX_VALUE) { + int givenIndex = params.getInt4(1); + // Zero and positive given index points out the element from the left side, + // while negative given index points out the element from the right side. + realIndex = givenIndex < 0 ? array.size() + givenIndex : givenIndex; + } + + if (realIndex >= array.size() || realIndex < 0) { + return NullDatum.get(); + } else { + return DatumFactory.createText(array.get(realIndex).toString()); + } + } else { + return NullDatum.get(); + } + } catch (ParseException e) { + return NullDatum.get(); + } + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/9114485b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayLength.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayLength.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayLength.java new file mode 100644 index 0000000..ff0a05a --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayLength.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.json; + +import net.minidev.json.JSONArray; +import net.minidev.json.parser.ParseException; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; +import org.apache.tajo.storage.Tuple; + +@Description( + functionName = "json_array_length", + description = "Returns the length of json array", + detail = "Returns the length of json array.", + example = "> SELECT json_array_length('[100, 200, 300]');\n" + + "3", + returnType = Type.INT8, + paramTypes = {@ParamTypes(paramTypes = {Type.TEXT})} +) +public class JsonArrayLength extends ScalarJsonFunction { + + public JsonArrayLength() { + super(new Column[]{ + new Column("string", TajoDataTypes.Type.TEXT), + }); + } + + @Override + public Datum eval(Tuple params) { + if (params.isBlankOrNull(0)) { + return NullDatum.get(); + } + try { + Object parsed = parser.parse(params.getBytes(0)); + if (parsed instanceof JSONArray) { + JSONArray array = (JSONArray) parsed; + return DatumFactory.createInt8(array.size()); + } else { + return NullDatum.get(); + } + } catch (ParseException e) { + return NullDatum.get(); + } + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/9114485b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java index 84c14c6..ced7789 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java @@ -20,7 +20,6 @@ package org.apache.tajo.engine.function.json; import com.jayway.jsonpath.JsonPath; import net.minidev.json.JSONObject; -import net.minidev.json.parser.JSONParser; import org.apache.tajo.catalog.Column; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.Datum; @@ -28,13 +27,12 @@ import org.apache.tajo.datum.DatumFactory; import org.apache.tajo.datum.NullDatum; import org.apache.tajo.engine.function.annotation.Description; import org.apache.tajo.engine.function.annotation.ParamTypes; -import org.apache.tajo.plan.function.GeneralFunction; import org.apache.tajo.storage.Tuple; /** * json_extract_path_text(string, string) - * Extracts JSON string from a JSON string based on json path specified, - * and returns JSON string pointed to by xPath. + * and returns JSON string pointed to by JSONPath. * * * Returns null if either argument is null. @@ -45,16 +43,15 @@ import org.apache.tajo.storage.Tuple; */ @Description( functionName = "json_extract_path_text", - description = "Returns JSON string pointed to by xPath", + description = "Returns JSON string pointed to by JSONPath", detail = "Extracts JSON string from a JSON string based on json path specified,\n" - + "and returns JSON string pointed to by xPath.", + + "and returns JSON string pointed to by JSONPath.", example = "> SELECT json_extract_path_text('{\"sample\" : {\"name\" : \"tajo\"}}','$.sample.name');\n" + "tajo", returnType = TajoDataTypes.Type.TEXT, paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.TEXT, TajoDataTypes.Type.TEXT})} ) -public class JsonExtractPathText extends GeneralFunction { - private JSONParser parser; +public class JsonExtractPathText extends ScalarJsonFunction { private JsonPath jsonPath; public JsonExtractPathText() { @@ -62,7 +59,6 @@ public class JsonExtractPathText extends GeneralFunction { new Column("string", TajoDataTypes.Type.TEXT), new Column("string", TajoDataTypes.Type.TEXT), }); - parser = new JSONParser(JSONParser.MODE_JSON_SIMPLE | JSONParser.IGNORE_CONTROL_CHAR); } @Override http://git-wip-us.apache.org/repos/asf/tajo/blob/9114485b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java new file mode 100644 index 0000000..db95aaa --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.json; + +import net.minidev.json.parser.JSONParser; +import org.apache.tajo.OverridableConf; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.plan.expr.FunctionEval; +import org.apache.tajo.plan.function.GeneralFunction; + +public abstract class ScalarJsonFunction extends GeneralFunction { + protected JSONParser parser; + + public ScalarJsonFunction(Column[] definedArgs) { + super(definedArgs); + } + + @Override + public void init(OverridableConf queryContext, FunctionEval.ParamType [] paramTypes) { + parser = new JSONParser(JSONParser.MODE_JSON_SIMPLE | JSONParser.IGNORE_CONTROL_CHAR); + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/9114485b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java index 57248a8..1792075 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java @@ -25,11 +25,42 @@ import org.junit.Test; public class TestJsonFunctions extends ExprTestBase { static final String JSON_DOCUMENT = "{\"map\" : {\"name\" : \"tajo\"}, \"array\" : [1,2,3]}"; + static final String JSON_ARRAY = "[100,200,300,400,500]"; + static final String JSON_COMPLEX_ARRAY = "[100, \"test\", \"2015-08-13 11:58:59\", 0.899999999999]"; + static final String JSON_EMPTY_ARRAY = "[]"; @Test public void testJsonExtractPathText() throws TajoException { testSimpleEval("select json_extract_path_text('" + JSON_DOCUMENT + "', '$.map.name') ", new String[]{"tajo"}); testSimpleEval("select json_extract_path_text('" + JSON_DOCUMENT + "', '$.array[1]') ", new String[]{"2"}); + } + + @Test + public void testJsonArrayGet() throws TajoException { + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', 0)", new String[]{"100"}); + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', 2)", new String[]{"300"}); + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', -1)", new String[]{"500"}); + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', -2)", new String[]{"400"}); + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', 10)", new String[]{""}); + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', -10)", new String[]{""}); + testSimpleEval("select json_array_get('" + JSON_EMPTY_ARRAY + "', 0)", new String[]{""}); + } + @Test + public void testJsonArrayContains() throws TajoException { + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', 100)", new String[]{"t"}); + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', 'test')", new String[]{"t"}); + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', '2015-08-13 11:58:59'::timestamp)", + new String[]{"t"}); + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', '2015-08-13 11:58:59'::date)", + new String[]{"f"}); + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', 1000)", new String[]{"f"}); + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', 0.899999999999)", new String[]{"t"}); + } + + @Test + public void testJsonArrayLength() throws TajoException { + testSimpleEval("select json_array_length('" + JSON_ARRAY + "')", new String[]{"5"}); + testSimpleEval("select json_array_length('" + JSON_EMPTY_ARRAY + "')", new String[]{"0"}); } } http://git-wip-us.apache.org/repos/asf/tajo/blob/9114485b/tajo-docs/src/main/sphinx/functions/json_func.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/functions/json_func.rst b/tajo-docs/src/main/sphinx/functions/json_func.rst index d05cc1b..904be5b 100644 --- a/tajo-docs/src/main/sphinx/functions/json_func.rst +++ b/tajo-docs/src/main/sphinx/functions/json_func.rst @@ -2,16 +2,61 @@ JSON Functions ******************************* -.. function:: json_extract_path_text (string json, string xpath) +.. function:: json_extract_path_text (string json, string json_path) - Extracts JSON string from a JSON string based on json path specified and returns JSON string pointed to by xPath + Extracts JSON string from a JSON string based on json path specified and returns JSON string pointed to by JSONPath. + Returns null if either argument is null. - :param string: - :param string: + :param json: JSON string + :param json_path: JSONpath :rtype: text :example: .. code-block:: sql - json_extract_path_text('{"test" : {"key" : "tajo"}}','$.test.key'); + select json_extract_path_text('{"test" : {"key" : "tajo"}}','$.test.key'); > tajo + +.. function:: json_array_get (string json_array, int index) + + Returns the element at the specified index into the JSON array. This function returns an element indexed from the end of an array with a negative index, and null if the element at the specified index doesnât exist. + + :param json_array: String of a JSON array + :param index: index + :rtype: text + :example: + + .. code-block:: sql + + select json_array_get('[100, 200, 300]', 0); + > 100 + + select json_array_get('[100, 200, 300]', -2); + > 200 + +.. function:: json_array_contains (string json_array, any value) + + Determine if the given value exists in the JSON array. + + :param json_array: String of a JSON array + :param value: value of any type + :rtype: text + :example: + + .. code-block:: sql + + select json_array_contains('[100, 200, 300]', 100); + > t + +.. function:: json_array_length(string json_array) + + Returns the length of json array. + + :param json_array: String of a JSON array + :rtype: int8 + :example: + + .. code-block:: sql + + select json_array_length('[100, 200, 300]'); + > 3
