[
https://issues.apache.org/jira/browse/DRILL-4694?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15315251#comment-15315251
]
ASF GitHub Bot commented on DRILL-4694:
---------------------------------------
Github user amansinha100 commented on a diff in the pull request:
https://github.com/apache/drill/pull/514#discussion_r65795257
--- Diff: exec/java-exec/src/test/java/org/apache/drill/TestCTASJson.java
---
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill;
+
+
+import org.apache.drill.common.util.TestTools;
+import org.apache.drill.exec.ExecConstants;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestCTASJson extends PlanTestBase {
+ static final org.slf4j.Logger logger =
org.slf4j.LoggerFactory.getLogger(TestCTASJson.class);
+
+ static final String WORKING_PATH = TestTools.getWorkingPath();
+ static final String TEST_RES_PATH = WORKING_PATH + "/src/test/resources";
+
+ @Test
+ /**
+ * Test a source json file that contains records that are maps with
fields of all types.
+ * Some records have missing fields. CTAS should skip the missing fields
+ */ public void testctas_alltypes_map() throws Exception {
+ String testName = "ctas_alltypes_map";
+ test("use dfs_test.tmp");
+ test("alter session set store.format = 'json' ");
+ test("alter session set store.json.writer.skip_null_fields = true");
// DEFAULT
+ test("create table " + testName + "_json as select * from cp.`json/" +
testName + ".json`");
+
+ final String query = "select * from `" + testName + "_json` t1 ";
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .jsonBaselineFile("json/" + testName + ".json")
+ .build()
+ .run();
+
+ test("drop table " + testName + "_json" );
+ }
+
+ @Test
+ /**
+ * Test a source json file that contains records that are maps with
fields of all types.
+ * Some records have missing fields. CTAS should NOT skip the missing
fields
+ */
+ public void testctas_alltypes_map_noskip() throws Exception {
+ String testName = "ctas_alltypes_map";
+ test("use dfs_test.tmp");
+ test("alter session set store.format = 'json' ");
+ test("alter session set store.json.writer.skip_null_fields = false");
// CHANGE from default
+ test("create table " + testName + "_json as select * from cp.`json/" +
testName + ".json`");
+
+ final String query = "select * from `" + testName + "_json` t1 ";
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .jsonBaselineFile("json/" + testName + "_out.json")
+ .build()
+ .run();
+
+ test("drop table " + testName + "_json" );
+ }
+
+ @Test
+ /**
+ * Test a source json file that contains records that are maps with
fields of all types.
+ * Some records have missing fields. CTAS should skip the missing fields
+ */ public void testctas_alltypes_repeatedmap() throws Exception {
+ String testName = "ctas_alltypes_repeated_map";
+ test("use dfs_test.tmp");
+ test("alter session set store.format = 'json' ");
+ test("alter session set store.json.writer.skip_null_fields = true");
// DEFAULT
+ test("create table " + testName + "_json as select * from cp.`json/" +
testName + ".json`");
+
+ final String query = "select * from `" + testName + "_json` t1 ";
+
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .jsonBaselineFile("json/" + testName + ".json")
+ .build()
+ .run();
+
+ test("drop table " + testName + "_json" );
+
+ }
+
+ @Test
+ /**
+ * Test a source json file that contains records that are maps with
fields of all types.
+ * Some records have missing fields. CTAS should NOT skip the missing
fields
+ */
+ public void testctas_alltypes_repeated_map_noskip() throws Exception {
+ String testName = "ctas_alltypes_repeated_map";
+ test("use dfs_test.tmp");
+ test("alter session set store.format = 'json' ");
+ test("alter session set store.json.writer.skip_null_fields = false");
// CHANGE from default
--- End diff --
The testBuilder provides the optionSettingQueriesForTestQuery() method to
set the options per query. Would be good to use it and have a finally block {}
that resets the option.
> CTAS in JSON format produces extraneous NULL fields
> ---------------------------------------------------
>
> Key: DRILL-4694
> URL: https://issues.apache.org/jira/browse/DRILL-4694
> Project: Apache Drill
> Issue Type: Bug
> Components: Storage - JSON
> Affects Versions: 1.6.0
> Reporter: Aman Sinha
> Assignee: Aman Sinha
>
> Consider the following JSON file:
> {noformat}
> // file t2.json
> {
> "X" : {
> "key1" : "value1",
> "key2" : "value2"
> }
> }
> {
> "X" : {
> "key3" : "value3",
> "key4" : "value4"
> }
> }
> {
> "X" : {
> "key5" : "value5",
> "key6" : "value6"
> }
> }
> {noformat}
> Now create a table in Json format using CTAS:
> {noformat}
> 0: jdbc:drill:zk=local> alter session set `store.format` = 'json';
> 0: jdbc:drill:zk=local> create table dfs.tmp.jt12 as select t.`X` from
> `t2.json` t;
> +-----------+----------------------------+
> | Fragment | Number of records written |
> +-----------+----------------------------+
> | 0_0 | 3 |
> +-----------+----------------------------+
> {noformat}
> The output file has rows with union schema of all the fields in all the
> records. This creates extraneous Null fields in the output:
> {noformat}
> $ cat jt12/0_0_0.json
> {
> "X" : {
> "key1" : "value1",
> "key2" : "value2",
> "key3" : null,
> "key4" : null,
> "key5" : null,
> "key6" : null
> }
> } {
> "X" : {
> "key1" : null,
> "key2" : null,
> "key3" : "value3",
> "key4" : "value4",
> "key5" : null,
> "key6" : null
> }
> } {
> "X" : {
> "key1" : null,
> "key2" : null,
> "key3" : null,
> "key4" : null,
> "key5" : "value5",
> "key6" : "value6"
> }
> }
> {noformat}
> Note that if I change the output format to CSV or Parquet, there are no Null
> fields created in the output file. The expectation for a CTAS in json
> format is that the output should match that of the input json data.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)