TAJO-1529: Implement json_extract_path_text(string, string) function.

Closes #512


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/d160f6e1
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/d160f6e1
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/d160f6e1

Branch: refs/heads/index_support
Commit: d160f6e189d4de6275e169a3e0fd74d9647481c4
Parents: 633109a
Author: Jinho Kim <[email protected]>
Authored: Tue Apr 7 15:43:19 2015 +0900
Committer: Jinho Kim <[email protected]>
Committed: Tue Apr 7 15:43:19 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |  3 +
 tajo-core/pom.xml                               |  8 ++
 .../function/json/JsonExtractPathText.java      | 89 ++++++++++++++++++++
 .../tajo/engine/function/TestJsonFunctions.java | 36 ++++++++
 tajo-docs/src/main/sphinx/functions.rst         |  3 +-
 .../src/main/sphinx/functions/json_func.rst     | 16 ++++
 tajo-project/pom.xml                            | 10 +++
 tajo-storage/tajo-storage-hdfs/pom.xml          |  1 -
 .../tajo/storage/json/JsonLineDeserializer.java | 26 +-----
 .../tajo/storage/json/JsonLineSerializer.java   |  3 +-
 .../text/TextFieldSerializerDeserializer.java   | 12 +--
 11 files changed, 176 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index c2016ab..d59a205 100644
--- a/CHANGES
+++ b/CHANGES
@@ -163,6 +163,9 @@ Release 0.11.0 - unreleased
 
   SUB TASKS
 
+    TAJO-1529: Implement json_extract_path_text(string, string) function.
+    (jinho)
+
     TAJO-1338: Defines RESTful API for Clients. (jihun)
 
     TAJO-1284: Add alter partition method to CatalogStore. (jaehwa)

http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-core/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-core/pom.xml b/tajo-core/pom.xml
index 19c9ba3..cce6aaa 100644
--- a/tajo-core/pom.xml
+++ b/tajo-core/pom.xml
@@ -462,6 +462,14 @@
       <artifactId>jcip-annotations</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.minidev</groupId>
+      <artifactId>json-smart</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.jayway.jsonpath</groupId>
+      <artifactId>json-path</artifactId>
+    </dependency>
   </dependencies>
 
   <profiles>

http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java
----------------------------------------------------------------------
diff --git 
a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java
 
b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java
new file mode 100644
index 0000000..27d342e
--- /dev/null
+++ 
b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.json;
+
+import com.jayway.jsonpath.JsonPath;
+import net.minidev.json.JSONObject;
+import net.minidev.json.parser.JSONParser;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.engine.function.annotation.Description;
+import org.apache.tajo.engine.function.annotation.ParamTypes;
+import org.apache.tajo.plan.function.GeneralFunction;
+import org.apache.tajo.storage.Tuple;
+
+/**
+ * json_extract_path_text(string, string) -
+ * Extracts JSON string from a JSON string based on json path specified,
+ * and returns JSON string pointed to by xPath.
+ *
+ *
+ * Returns null if either argument is null.
+ *
+ * Example:
+ * SELECT json_extract_path_text('{"sample" : {"name" : 
"tajo"}}','$.sample.name') FROM src LIMIT 1;\n"
+ * -> result: 'tajo'
+ */
+@Description(
+    functionName = "json_extract_path_text",
+    description = "Returns JSON string pointed to by xPath",
+    detail = "Extracts JSON string from a JSON string based on json path 
specified,\n"
+        + "and returns JSON string pointed to by xPath.",
+    example = "> SELECT json_extract_path_text('{\"sample\" : {\"name\" : 
\"tajo\"}}','$.sample.name');\n"
+        + "tajo",
+    returnType = TajoDataTypes.Type.TEXT,
+    paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.TEXT, 
TajoDataTypes.Type.TEXT})}
+)
+public class JsonExtractPathText extends GeneralFunction {
+  private JSONParser parser;
+  private JsonPath jsonPath;
+
+  public JsonExtractPathText() {
+    super(new Column[]{
+        new Column("string", TajoDataTypes.Type.TEXT),
+        new Column("string", TajoDataTypes.Type.TEXT),
+    });
+    parser = new JSONParser(JSONParser.MODE_JSON_SIMPLE | 
JSONParser.IGNORE_CONTROL_CHAR);
+  }
+
+  @Override
+  public Datum eval(Tuple params) {
+    Datum json = params.get(0);
+    Datum xPath = params.get(1);
+
+    if (json instanceof NullDatum || xPath instanceof NullDatum) {
+      return NullDatum.get();
+    }
+
+    // default is JsonSmartMappingProvider
+    try {
+
+      JSONObject object = (JSONObject) parser.parse(json.asTextBytes());
+      if (jsonPath == null) {
+        jsonPath = JsonPath.compile(xPath.asChars());
+      }
+      return DatumFactory.createText(jsonPath.read(object).toString());
+    } catch (Exception e) {
+      return NullDatum.get();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java
----------------------------------------------------------------------
diff --git 
a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java
 
b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java
new file mode 100644
index 0000000..89f0439
--- /dev/null
+++ 
b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function;
+
+
+import org.apache.tajo.engine.eval.ExprTestBase;
+import org.junit.Test;
+
+import java.io.IOException;
+
+public class TestJsonFunctions extends ExprTestBase {
+  static final String JSON_DOCUMENT = "{\"map\" : {\"name\" : \"tajo\"}, 
\"array\" : [1,2,3]}";
+
+  @Test
+  public void testJsonExtractPathText() throws IOException {
+    testSimpleEval("select json_extract_path_text('" + JSON_DOCUMENT + "', 
'$.map.name') ", new String[]{"tajo"});
+    testSimpleEval("select json_extract_path_text('" + JSON_DOCUMENT + "', 
'$.array[1]') ", new String[]{"2"});
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-docs/src/main/sphinx/functions.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/functions.rst 
b/tajo-docs/src/main/sphinx/functions.rst
index fb93d1e..453edf4 100644
--- a/tajo-docs/src/main/sphinx/functions.rst
+++ b/tajo-docs/src/main/sphinx/functions.rst
@@ -8,4 +8,5 @@ Functions
     functions/math_func_and_operators
     functions/string_func_and_operators
     functions/datetime_func_and_operators
-    functions/network_func_and_operators
\ No newline at end of file
+    functions/network_func_and_operators
+    functions/json_func
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-docs/src/main/sphinx/functions/json_func.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/functions/json_func.rst 
b/tajo-docs/src/main/sphinx/functions/json_func.rst
new file mode 100644
index 0000000..5bf5814
--- /dev/null
+++ b/tajo-docs/src/main/sphinx/functions/json_func.rst
@@ -0,0 +1,16 @@
+*******************************
+JSON Functions
+*******************************
+
+.. function:: json_extract_path_text (string json, string xpath)
+  Extracts JSON string from a JSON string based on json path specified and 
returns JSON string pointed to by xPath
+
+  :param string:
+  :param string:
+  :rtype: text
+  :example:
+
+  .. code-block:: sql
+
+    json_extract_path_text('{"test" : {"key" : "tajo"}}','$.test.key');
+    > tajo

http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-project/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-project/pom.xml b/tajo-project/pom.xml
index e2b0a0e..65fbaa3 100644
--- a/tajo-project/pom.xml
+++ b/tajo-project/pom.xml
@@ -1099,6 +1099,16 @@
         <artifactId>javax.ws.rs-api</artifactId>
         <version>2.0.1</version>
       </dependency>
+      <dependency>
+        <groupId>net.minidev</groupId>
+        <artifactId>json-smart</artifactId>
+        <version>2.1.1</version>
+      </dependency>
+      <dependency>
+        <groupId>com.jayway.jsonpath</groupId>
+        <artifactId>json-path</artifactId>
+        <version>2.0.0</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
   <profiles>

http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-storage/tajo-storage-hdfs/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/pom.xml 
b/tajo-storage/tajo-storage-hdfs/pom.xml
index 33db33b..33b3bc7 100644
--- a/tajo-storage/tajo-storage-hdfs/pom.xml
+++ b/tajo-storage/tajo-storage-hdfs/pom.xml
@@ -351,7 +351,6 @@ limitations under the License.
     <dependency>
       <groupId>net.minidev</groupId>
       <artifactId>json-smart</artifactId>
-      <version>2.0</version>
     </dependency>
   </dependencies>
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java
----------------------------------------------------------------------
diff --git 
a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java
 
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java
index a7e02a4..204f607 100644
--- 
a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java
+++ 
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java
@@ -20,10 +20,10 @@ package org.apache.tajo.storage.json;
 
 
 import io.netty.buffer.ByteBuf;
-import net.minidev.json.JSONArray;
 import net.minidev.json.JSONObject;
 import net.minidev.json.parser.JSONParser;
 import net.minidev.json.parser.ParseException;
+import org.apache.commons.net.util.Base64;
 import org.apache.tajo.catalog.Schema;
 import org.apache.tajo.catalog.SchemaUtil;
 import org.apache.tajo.catalog.TableMeta;
@@ -37,7 +37,6 @@ import org.apache.tajo.storage.text.TextLineDeserializer;
 import org.apache.tajo.storage.text.TextLineParsingError;
 
 import java.io.IOException;
-import java.util.Iterator;
 
 public class JsonLineDeserializer extends TextLineDeserializer {
   private JSONParser parser;
@@ -174,31 +173,14 @@ public class JsonLineDeserializer extends 
TextLineDeserializer {
         case BINARY:
         case VARBINARY:
         case BLOB: {
-          Object jsonObject = object.get(fieldName);
+          Object jsonObject = object.getAsString(fieldName);
 
           if (jsonObject == null) {
             output.put(actualIdx, NullDatum.get());
             break;
           }
-          if (jsonObject instanceof String) {
-            output.put(actualIdx, DatumFactory.createBlob((String) 
jsonObject));
-          } else if (jsonObject instanceof JSONArray) {
-            JSONArray jsonArray = (JSONArray) jsonObject;
-            byte[] bytes = new byte[jsonArray.size()];
-            Iterator<Object> it = jsonArray.iterator();
-            int arrayIdx = 0;
-            while (it.hasNext()) {
-              bytes[arrayIdx++] = ((Long) it.next()).byteValue();
-            }
-            if (bytes.length > 0) {
-              output.put(actualIdx, DatumFactory.createBlob(bytes));
-            } else {
-              output.put(actualIdx, NullDatum.get());
-            }
-            break;
-          } else {
-            throw new IOException("Unknown json object: " + 
object.getClass().getSimpleName());
-          }
+
+          output.put(actualIdx, 
DatumFactory.createBlob(Base64.decodeBase64((String) jsonObject)));
           break;
         }
         case INET4:

http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java
----------------------------------------------------------------------
diff --git 
a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java
 
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java
index cd31ada..d6faf2d 100644
--- 
a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java
+++ 
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java
@@ -20,6 +20,7 @@ package org.apache.tajo.storage.json;
 
 
 import net.minidev.json.JSONObject;
+import org.apache.commons.net.util.Base64;
 import org.apache.tajo.catalog.Schema;
 import org.apache.tajo.catalog.SchemaUtil;
 import org.apache.tajo.catalog.TableMeta;
@@ -106,7 +107,7 @@ public class JsonLineSerializer extends TextLineSerializer {
       case BINARY:
       case BLOB:
       case VARBINARY:
-        jsonObject.put(fieldName, input.getBytes(i));
+        jsonObject.put(fieldName,  
Base64.encodeBase64String(input.getBytes(i)));
         break;
 
       case NULL_TYPE:

http://git-wip-us.apache.org/repos/asf/tajo/blob/d160f6e1/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java
----------------------------------------------------------------------
diff --git 
a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java
 
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java
index ae7565d..e637c7f 100644
--- 
a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java
+++ 
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextFieldSerializerDeserializer.java
@@ -26,11 +26,11 @@ import org.apache.tajo.TajoConstants;
 import org.apache.tajo.catalog.Column;
 import org.apache.tajo.catalog.TableMeta;
 import org.apache.tajo.common.TajoDataTypes;
-import org.apache.tajo.conf.TajoConf;
 import org.apache.tajo.datum.*;
 import org.apache.tajo.datum.protobuf.ProtobufJsonFormat;
 import org.apache.tajo.storage.FieldSerializerDeserializer;
 import org.apache.tajo.storage.StorageConstants;
+import org.apache.tajo.util.Bytes;
 import org.apache.tajo.util.NumberUtil;
 
 import java.io.IOException;
@@ -39,8 +39,8 @@ import java.nio.charset.CharsetDecoder;
 import java.util.TimeZone;
 
 public class TextFieldSerializerDeserializer implements 
FieldSerializerDeserializer {
-  public static final byte[] trueBytes = "true".getBytes();
-  public static final byte[] falseBytes = "false".getBytes();
+  private static final byte[] trueBytes = "true".getBytes(Bytes.UTF8_CHARSET);
+  private static final byte[] falseBytes = 
"false".getBytes(Bytes.UTF8_CHARSET);
   private static ProtobufJsonFormat protobufJsonFormat = 
ProtobufJsonFormat.getInstance();
   private final CharsetDecoder decoder = 
CharsetUtil.getDecoder(CharsetUtil.UTF_8);
 
@@ -108,7 +108,7 @@ public class TextFieldSerializerDeserializer implements 
FieldSerializerDeseriali
         break;
       case TIME:
         if (hasTimezone) {
-          bytes = ((TimeDatum) datum).asChars(timezone, true).getBytes();
+          bytes = ((TimeDatum) datum).asChars(timezone, 
true).getBytes(Bytes.UTF8_CHARSET);
         } else {
           bytes = datum.asTextBytes();
         }
@@ -117,7 +117,7 @@ public class TextFieldSerializerDeserializer implements 
FieldSerializerDeseriali
         break;
       case TIMESTAMP:
         if (hasTimezone) {
-          bytes = ((TimestampDatum) datum).asChars(timezone, true).getBytes();
+          bytes = ((TimestampDatum) datum).asChars(timezone, 
true).getBytes(Bytes.UTF8_CHARSET);
         } else {
           bytes = datum.asTextBytes();
         }
@@ -132,7 +132,7 @@ public class TextFieldSerializerDeserializer implements 
FieldSerializerDeseriali
         break;
       case PROTOBUF:
         ProtobufDatum protobuf = (ProtobufDatum) datum;
-        byte[] protoBytes = 
protobufJsonFormat.printToString(protobuf.get()).getBytes();
+        byte[] protoBytes = 
protobufJsonFormat.printToString(protobuf.get()).getBytes(Bytes.UTF8_CHARSET);
         length = protoBytes.length;
         out.write(protoBytes, 0, protoBytes.length);
         break;

Reply via email to