TAJO-1529: Implement json_extract_path_text(string, string) function. Closes #512
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/d160f6e1 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/d160f6e1 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/d160f6e1 Branch: refs/heads/index_support Commit: d160f6e189d4de6275e169a3e0fd74d9647481c4 Parents: 633109a Author: Jinho Kim <[email protected]> Authored: Tue Apr 7 15:43:19 2015 +0900 Committer: Jinho Kim <[email protected]> Committed: Tue Apr 7 15:43:19 2015 +0900 ---------------------------------------------------------------------- CHANGES | 3 + tajo-core/pom.xml | 8 ++ .../function/json/JsonExtractPathText.java | 89 ++++++++++++++++++++ .../tajo/engine/function/TestJsonFunctions.java | 36 ++++++++ tajo-docs/src/main/sphinx/functions.rst | 3 +- .../src/main/sphinx/functions/json_func.rst | 16 ++++ tajo-project/pom.xml | 10 +++ tajo-storage/tajo-storage-hdfs/pom.xml | 1 - .../tajo/storage/json/JsonLineDeserializer.java | 26 +----- .../tajo/storage/json/JsonLineSerializer.java | 3 +- .../text/TextFieldSerializerDeserializer.java | 12 +-- 11 files changed, 176 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index c2016ab..d59a205 100644 --- a/CHANGES +++ b/CHANGES @@ -163,6 +163,9 @@ Release 0.11.0 - unreleased SUB TASKS + TAJO-1529: Implement json_extract_path_text(string, string) function. + (jinho) + TAJO-1338: Defines RESTful API for Clients. (jihun) TAJO-1284: Add alter partition method to CatalogStore. (jaehwa) http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-core/pom.xml ---------------------------------------------------------------------- diff --git a/tajo-core/pom.xml b/tajo-core/pom.xml index 19c9ba3..cce6aaa 100644 --- a/tajo-core/pom.xml +++ b/tajo-core/pom.xml @@ -462,6 +462,14 @@ <artifactId>jcip-annotations</artifactId> <scope>test</scope> </dependency> + <dependency> + <groupId>net.minidev</groupId> + <artifactId>json-smart</artifactId> + </dependency> + <dependency> + <groupId>com.jayway.jsonpath</groupId> + <artifactId>json-path</artifactId> + </dependency> </dependencies> <profiles> http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java new file mode 100644 index 0000000..27d342e --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.json; + +import com.jayway.jsonpath.JsonPath; +import net.minidev.json.JSONObject; +import net.minidev.json.parser.JSONParser; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; +import org.apache.tajo.plan.function.GeneralFunction; +import org.apache.tajo.storage.Tuple; + +/** + * json_extract_path_text(string, string) - + * Extracts JSON string from a JSON string based on json path specified, + * and returns JSON string pointed to by xPath. + * + * + * Returns null if either argument is null. + * + * Example: + * SELECT json_extract_path_text('{"sample" : {"name" : "tajo"}}','$.sample.name') FROM src LIMIT 1;\n" + * -> result: 'tajo' + */ +@Description( + functionName = "json_extract_path_text", + description = "Returns JSON string pointed to by xPath", + detail = "Extracts JSON string from a JSON string based on json path specified,\n" + + "and returns JSON string pointed to by xPath.", + example = "> SELECT json_extract_path_text('{\"sample\" : {\"name\" : \"tajo\"}}','$.sample.name');\n" + + "tajo", + returnType = TajoDataTypes.Type.TEXT, + paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.TEXT, TajoDataTypes.Type.TEXT})} +) +public class JsonExtractPathText extends GeneralFunction { + private JSONParser parser; + private JsonPath jsonPath; + + public JsonExtractPathText() { + super(new Column[]{ + new Column("string", TajoDataTypes.Type.TEXT), + new Column("string", TajoDataTypes.Type.TEXT), + }); + parser = new JSONParser(JSONParser.MODE_JSON_SIMPLE | JSONParser.IGNORE_CONTROL_CHAR); + } + + @Override + public Datum eval(Tuple params) { + Datum json = params.get(0); + Datum xPath = params.get(1); + + if (json instanceof NullDatum || xPath instanceof NullDatum) { + return NullDatum.get(); + } + + // default is JsonSmartMappingProvider + try { + + JSONObject object = (JSONObject) parser.parse(json.asTextBytes()); + if (jsonPath == null) { + jsonPath = JsonPath.compile(xPath.asChars()); + } + return DatumFactory.createText(jsonPath.read(object).toString()); + } catch (Exception e) { + return NullDatum.get(); + } + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java new file mode 100644 index 0000000..89f0439 --- /dev/null +++ b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function; + + +import org.apache.tajo.engine.eval.ExprTestBase; +import org.junit.Test; + +import java.io.IOException; + +public class TestJsonFunctions extends ExprTestBase { + static final String JSON_DOCUMENT = "{\"map\" : {\"name\" : \"tajo\"}, \"array\" : [1,2,3]}"; + + @Test + public void testJsonExtractPathText() throws IOException { + testSimpleEval("select json_extract_path_text('" + JSON_DOCUMENT + "', '$.map.name') ", new String[]{"tajo"}); + testSimpleEval("select json_extract_path_text('" + JSON_DOCUMENT + "', '$.array[1]') ", new String[]{"2"}); + + } +} http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-docs/src/main/sphinx/functions.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/functions.rst b/tajo-docs/src/main/sphinx/functions.rst index fb93d1e..453edf4 100644 --- a/tajo-docs/src/main/sphinx/functions.rst +++ b/tajo-docs/src/main/sphinx/functions.rst @@ -8,4 +8,5 @@ Functions functions/math_func_and_operators functions/string_func_and_operators functions/datetime_func_and_operators - functions/network_func_and_operators \ No newline at end of file + functions/network_func_and_operators + functions/json_func \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-docs/src/main/sphinx/functions/json_func.rst ---------------------------------------------------------------------- diff --git a/tajo-docs/src/main/sphinx/functions/json_func.rst b/tajo-docs/src/main/sphinx/functions/json_func.rst new file mode 100644 index 0000000..5bf5814 --- /dev/null +++ b/tajo-docs/src/main/sphinx/functions/json_func.rst @@ -0,0 +1,16 @@ +******************************* +JSON Functions +******************************* + +.. function:: json_extract_path_text (string json, string xpath) + Extracts JSON string from a JSON string based on json path specified and returns JSON string pointed to by xPath + + :param string: + :param string: + :rtype: text + :example: + + .. code-block:: sql + + json_extract_path_text('{"test" : {"key" : "tajo"}}','$.test.key'); + > tajo http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-project/pom.xml ---------------------------------------------------------------------- diff --git a/tajo-project/pom.xml b/tajo-project/pom.xml index e2b0a0e..65fbaa3 100644 --- a/tajo-project/pom.xml +++ b/tajo-project/pom.xml @@ -1099,6 +1099,16 @@ <artifactId>javax.ws.rs-api</artifactId> <version>2.0.1</version> </dependency> + <dependency> + <groupId>net.minidev</groupId> + <artifactId>json-smart</artifactId> + <version>2.1.1</version> + </dependency> + <dependency> + <groupId>com.jayway.jsonpath</groupId> + <artifactId>json-path</artifactId> + <version>2.0.0</version> + </dependency> </dependencies> </dependencyManagement> <profiles> http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-storage/tajo-storage-hdfs/pom.xml ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/pom.xml b/tajo-storage/tajo-storage-hdfs/pom.xml index 33db33b..33b3bc7 100644 --- a/tajo-storage/tajo-storage-hdfs/pom.xml +++ b/tajo-storage/tajo-storage-hdfs/pom.xml @@ -351,7 +351,6 @@ limitations under the License. <dependency> <groupId>net.minidev</groupId> <artifactId>json-smart</artifactId> - <version>2.0</version> </dependency> </dependencies> http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java index a7e02a4..204f607 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java @@ -20,10 +20,10 @@ package org.apache.tajo.storage.json; import io.netty.buffer.ByteBuf; -import net.minidev.json.JSONArray; import net.minidev.json.JSONObject; import net.minidev.json.parser.JSONParser; import net.minidev.json.parser.ParseException; +import org.apache.commons.net.util.Base64; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.SchemaUtil; import org.apache.tajo.catalog.TableMeta; @@ -37,7 +37,6 @@ import org.apache.tajo.storage.text.TextLineDeserializer; import org.apache.tajo.storage.text.TextLineParsingError; import java.io.IOException; -import java.util.Iterator; public class JsonLineDeserializer extends TextLineDeserializer { private JSONParser parser; @@ -174,31 +173,14 @@ public class JsonLineDeserializer extends TextLineDeserializer { case BINARY: case VARBINARY: case BLOB: { - Object jsonObject = object.get(fieldName); + Object jsonObject = object.getAsString(fieldName); if (jsonObject == null) { output.put(actualIdx, NullDatum.get()); break; } - if (jsonObject instanceof String) { - output.put(actualIdx, DatumFactory.createBlob((String) jsonObject)); - } else if (jsonObject instanceof JSONArray) { - JSONArray jsonArray = (JSONArray) jsonObject; - byte[] bytes = new byte[jsonArray.size()]; - Iterator<Object> it = jsonArray.iterator(); - int arrayIdx = 0; - while (it.hasNext()) { - bytes[arrayIdx++] = ((Long) it.next()).byteValue(); - } - if (bytes.length > 0) { - output.put(actualIdx, DatumFactory.createBlob(bytes)); - } else { - output.put(actualIdx, NullDatum.get()); - } - break; - } else { - throw new IOException("Unknown json object: " + object.getClass().getSimpleName()); - } + + output.put(actualIdx, DatumFactory.createBlob(Base64.decodeBase64((String) jsonObject))); break; } case INET4: http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java index cd31ada..d6faf2d 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java @@ -20,6 +20,7 @@ package org.apache.tajo.storage.json; import net.minidev.json.JSONObject; +import org.apache.commons.net.util.Base64; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.SchemaUtil; import org.apache.tajo.catalog.TableMeta; @@ -106,7 +107,7 @@ public class JsonLineSerializer extends TextLineSerializer { case BINARY: case BLOB: case VARBINARY: - jsonObject.put(fieldName, input.getBytes(i)); + jsonObject.put(fieldName, Base64.encodeBase64String(input.getBytes(i))); break; case NULL_TYPE: http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java index ae7565d..e637c7f 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java @@ -26,11 +26,11 @@ import org.apache.tajo.TajoConstants; import org.apache.tajo.catalog.Column; import org.apache.tajo.catalog.TableMeta; import org.apache.tajo.common.TajoDataTypes; -import org.apache.tajo.conf.TajoConf; import org.apache.tajo.datum.*; import org.apache.tajo.datum.protobuf.ProtobufJsonFormat; import org.apache.tajo.storage.FieldSerializerDeserializer; import org.apache.tajo.storage.StorageConstants; +import org.apache.tajo.util.Bytes; import org.apache.tajo.util.NumberUtil; import java.io.IOException; @@ -39,8 +39,8 @@ import java.nio.charset.CharsetDecoder; import java.util.TimeZone; public class TextFieldSerializerDeserializer implements FieldSerializerDeserializer { - public static final byte[] trueBytes = "true".getBytes(); - public static final byte[] falseBytes = "false".getBytes(); + private static final byte[] trueBytes = "true".getBytes(Bytes.UTF8_CHARSET); + private static final byte[] falseBytes = "false".getBytes(Bytes.UTF8_CHARSET); private static ProtobufJsonFormat protobufJsonFormat = ProtobufJsonFormat.getInstance(); private final CharsetDecoder decoder = CharsetUtil.getDecoder(CharsetUtil.UTF_8); @@ -108,7 +108,7 @@ public class TextFieldSerializerDeserializer implements FieldSerializerDeseriali break; case TIME: if (hasTimezone) { - bytes = ((TimeDatum) datum).asChars(timezone, true).getBytes(); + bytes = ((TimeDatum) datum).asChars(timezone, true).getBytes(Bytes.UTF8_CHARSET); } else { bytes = datum.asTextBytes(); } @@ -117,7 +117,7 @@ public class TextFieldSerializerDeserializer implements FieldSerializerDeseriali break; case TIMESTAMP: if (hasTimezone) { - bytes = ((TimestampDatum) datum).asChars(timezone, true).getBytes(); + bytes = ((TimestampDatum) datum).asChars(timezone, true).getBytes(Bytes.UTF8_CHARSET); } else { bytes = datum.asTextBytes(); } @@ -132,7 +132,7 @@ public class TextFieldSerializerDeserializer implements FieldSerializerDeseriali break; case PROTOBUF: ProtobufDatum protobuf = (ProtobufDatum) datum; - byte[] protoBytes = protobufJsonFormat.printToString(protobuf.get()).getBytes(); + byte[] protoBytes = protobufJsonFormat.printToString(protobuf.get()).getBytes(Bytes.UTF8_CHARSET); length = protoBytes.length; out.write(protoBytes, 0, protoBytes.length); break;
