[
https://issues.apache.org/jira/browse/HAWQ-178?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15131832#comment-15131832
]
ASF GitHub Bot commented on HAWQ-178:
-------------------------------------
Github user hornn commented on a diff in the pull request:
https://github.com/apache/incubator-hawq/pull/302#discussion_r51835264
--- Diff:
pxf/pxf-json/src/test/java/org/apache/hawq/pxf/plugins/json/JsonExtensionTest.java
---
@@ -0,0 +1,288 @@
+package org.apache.hawq.pxf.plugins.json;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hawq.pxf.api.Fragmenter;
+import org.apache.hawq.pxf.api.ReadAccessor;
+import org.apache.hawq.pxf.api.ReadResolver;
+import org.apache.hawq.pxf.api.io.DataType;
+import org.apache.hawq.pxf.plugins.hdfs.HdfsDataFragmenter;
+import org.apache.hawq.pxf.plugins.json.JsonAccessor;
+import org.apache.hawq.pxf.plugins.json.JsonResolver;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class JsonExtensionTest extends PxfUnit {
+
+ private List<Pair<String, DataType>> columnDefs = null;
+ private List<Pair<String, String>> extraParams = new
ArrayList<Pair<String, String>>();
+
+ @Before
+ public void before() {
+
+ columnDefs = new ArrayList<Pair<String, DataType>>();
+
+ columnDefs.add(new Pair<String, DataType>("created_at",
DataType.TEXT));
+ columnDefs.add(new Pair<String, DataType>("id",
DataType.BIGINT));
+ columnDefs.add(new Pair<String, DataType>("text",
DataType.TEXT));
+ columnDefs.add(new Pair<String, DataType>("user.screen_name",
DataType.TEXT));
+ columnDefs.add(new Pair<String,
DataType>("entities.hashtags[0]", DataType.TEXT));
+ columnDefs.add(new Pair<String,
DataType>("coordinates.coordinates[0]", DataType.FLOAT8));
+ columnDefs.add(new Pair<String,
DataType>("coordinates.coordinates[1]", DataType.FLOAT8));
+ }
+
+ @After
+ public void cleanup() throws Exception {
+ columnDefs.clear();
+ extraParams.clear();
+ }
+
+ @Test
+ public void testCompressedMultilineJsonFile() throws Exception {
+
+ extraParams.add(new Pair<String, String>("IDENTIFIER",
"created_at"));
+
+ List<String> output = new ArrayList<String>();
+
+ output.add("Fri Jun 07 22:45:02 +0000
2013,343136547115253761,text1,SpreadButter,tweetCongress,,");
+ output.add("Fri Jun 07 22:45:02 +0000
2013,343136547123646465,text2,patronusdeadly,,,");
+ output.add("Fri Jun 07 22:45:02 +0000
2013,343136547136233472,text3,NoSecrets_Vagas,,,");
+
+ super.assertOutput(new Path(System.getProperty("user.dir") +
File.separator
+ + "src/test/resources/tweets.tar.gz"), output);
+ }
+
+ @Test
+ public void testMaxRecordLength() throws Exception {
+
+ // variable-size-objects.json contains 3 json objects but only
2 of them fit in the 27 byte length limitation
+
+ extraParams.add(new Pair<String, String>("IDENTIFIER",
"key666"));
+ extraParams.add(new Pair<String, String>("MAXLENGTH", "27"));
+
+ columnDefs.clear();
+ columnDefs.add(new Pair<String, DataType>("key666",
DataType.TEXT));
+
+ List<String> output = new ArrayList<String>();
+
+ output.add("small object1");
+ // skip the large object2
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ output.add("small object3");
+
+ super.assertOutput(new Path(System.getProperty("user.dir") +
File.separator
+ +
"src/test/resources/variable-size-objects.json"), output);
+ }
+
+ @Test
+ public void testDataTypes() throws Exception {
+
+ // TDOO: The BYTEA type is not tested!!! Current oneField.val =
val.asText().getBytes(); convention is
--- End diff --
Let's open a sub-task for it, what do you think?
> Add JSON plugin support in code base
> ------------------------------------
>
> Key: HAWQ-178
> URL: https://issues.apache.org/jira/browse/HAWQ-178
> Project: Apache HAWQ
> Issue Type: New Feature
> Components: PXF
> Reporter: Goden Yao
> Assignee: Christian Tzolov
> Fix For: backlog
>
> Attachments: PXFJSONPluginforHAWQ2.0andPXF3.0.0.pdf,
> PXFJSONPluginforHAWQ2.0andPXF3.0.0v.2.pdf,
> PXFJSONPluginforHAWQ2.0andPXF3.0.0v.3.pdf
>
>
> JSON has been a popular format used in HDFS as well as in the community,
> there has been a few JSON PXF plugins developed by the community and we'd
> like to see it being incorporated into the code base as an optional package.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)