TAJO-1723: INSERT INTO statement should allow nested fields as target columns.
Closes #668 Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/a94936ae Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/a94936ae Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/a94936ae Branch: refs/heads/master Commit: a94936ae569983bb89b4a5270d0203d7b5e591c2 Parents: 89892d0 Author: Hyunsik Choi <[email protected]> Authored: Tue Aug 4 18:05:40 2015 +0900 Committer: Hyunsik Choi <[email protected]> Committed: Tue Aug 4 18:05:40 2015 +0900 ---------------------------------------------------------------------- CHANGES | 3 + .../java/org/apache/tajo/algebra/Insert.java | 6 +- .../org/apache/tajo/catalog/NestedPathUtil.java | 25 +--- .../org/apache/tajo/catalog/SchemaUtil.java | 40 ++++- .../org/apache/tajo/engine/parser/SQLParser.g4 | 2 +- .../apache/tajo/engine/parser/SQLAnalyzer.java | 8 +- .../engine/query/TestSelectNestedRecord.java | 40 ++++- .../TestSelectNestedRecord/sample0_ddl.sql | 1 + .../TestSelectNestedRecord/testSelect0.sql | 1 + .../testInsertType1.result | 5 + .../testInsertType2.result | 5 + .../TestSelectNestedRecord/testSelect0.result | 5 + .../org/apache/tajo/plan/LogicalPlanner.java | 16 +- .../tajo/storage/json/JsonLineDeserializer.java | 39 +---- .../tajo/storage/json/JsonLineSerializer.java | 148 ++++++++++--------- 15 files changed, 208 insertions(+), 136 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 129c7de..0d35d8a 100644 --- a/CHANGES +++ b/CHANGES @@ -457,6 +457,9 @@ Release 0.11.0 - unreleased SUB TASKS + TAJO-1723: INSERT INTO statement should allow nested fields as + target columns. (hyunsik) + TAJO-1302: Support index metadata backup and restore. (jihoon) TAJO-1484 Apply on ColPartitionStoreExec. (Contributed by Navis, http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-algebra/src/main/java/org/apache/tajo/algebra/Insert.java ---------------------------------------------------------------------- diff --git a/tajo-algebra/src/main/java/org/apache/tajo/algebra/Insert.java b/tajo-algebra/src/main/java/org/apache/tajo/algebra/Insert.java index f0cd5f9..299a480 100644 --- a/tajo-algebra/src/main/java/org/apache/tajo/algebra/Insert.java +++ b/tajo-algebra/src/main/java/org/apache/tajo/algebra/Insert.java @@ -32,7 +32,7 @@ public class Insert extends Expr { @Expose @SerializedName("TableName") private String tableName; @Expose @SerializedName("TargetColumns") - private String [] targetColumns; + private ColumnReferenceExpr [] targetColumns; @Expose @SerializedName("StorageType") private String storageType; @Expose @SerializedName("Location") @@ -70,11 +70,11 @@ public class Insert extends Expr { return targetColumns != null; } - public String [] getTargetColumns() { + public ColumnReferenceExpr [] getTargetColumns() { return targetColumns; } - public void setTargetColumns(String [] targets) { + public void setTargetColumns(ColumnReferenceExpr [] targets) { this.targetColumns = targets; } http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/NestedPathUtil.java ---------------------------------------------------------------------- diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/NestedPathUtil.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/NestedPathUtil.java index 58b4f26..dec9a55 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/NestedPathUtil.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/NestedPathUtil.java @@ -19,11 +19,11 @@ package org.apache.tajo.catalog; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import org.apache.tajo.common.TajoDataTypes.Type; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; +import java.util.*; /** * Utility methods for nested field @@ -65,24 +65,6 @@ public class NestedPathUtil { return sb.toString(); } - /** - * Lookup the actual column corresponding to a given path. - * We assume that a path starts with the slash '/' and it - * does not include the root field. - * - * @param nestedField Nested column - * @param path Path which starts with '/'; - * @return Column corresponding to the path - */ - public static Column lookupPath(Column nestedField, String path) { - Preconditions.checkArgument(path.charAt(0) == PATH_DELIMITER.charAt(0), - "A nested field path must start with slash '/'."); - - // We assume that path starts with '/', causing an empty string "" at 0 in the path splits. - // So, we should start the index from 1 instead of 0. - return lookupPath(nestedField, path.split(PATH_DELIMITER)); - } - public static Column lookupPath(Column nestedField, String [] paths) { // We assume that path starts with '/', causing an empty string "" at 0 in the path splits. // So, we should start the index from 1 instead of 0. @@ -106,4 +88,5 @@ public class NestedPathUtil { throw new NoSuchFieldError(makePath(paths)); } } + } http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java ---------------------------------------------------------------------- diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java index 44973db..09a2e45 100644 --- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java +++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/SchemaUtil.java @@ -18,8 +18,11 @@ package org.apache.tajo.catalog; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import org.apache.tajo.util.TUtil; +import java.util.HashMap; import java.util.List; import static org.apache.tajo.common.TajoDataTypes.DataType; @@ -113,11 +116,44 @@ public class SchemaUtil { return names; } + public static String [] convertColumnsToPaths(Iterable<Column> columns, boolean onlyLeaves) { + List<String> paths = Lists.newArrayList(); + + for (Column c : columns) { + if (onlyLeaves && c.getDataType().getType() == Type.RECORD) { + continue; + } + paths.add(c.getSimpleName()); + } + + return paths.toArray(new String [paths.size()]); + } + + public static ImmutableMap<String, Type> buildTypeMap(Iterable<Column> schema, String [] targetPaths) { + + HashMap<String, Type> builder = new HashMap<String, Type>(); + for (Column column : schema) { + + // Keep types which only belong to projected paths + // For example, assume that a projected path is 'name/first_name', where name is RECORD and first_name is TEXT. + // In this case, we should keep two types: + // * name - RECORD + // * name/first_name TEXT + for (String p : targetPaths) { + if (p.startsWith(column.getSimpleName())) { + builder.put(column.getSimpleName(), column.getDataType().getType()); + } + } + } + + return ImmutableMap.copyOf(builder); + } + /** * Column visitor interface */ - public static interface ColumnVisitor { - public void visit(int depth, List<String> path, Column column); + public interface ColumnVisitor { + void visit(int depth, List<String> path, Column column); } /** http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 b/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 index 149269e..41de218 100644 --- a/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 +++ b/tajo-core/src/main/antlr4/org/apache/tajo/engine/parser/SQLParser.g4 @@ -1599,7 +1599,7 @@ null_ordering */ insert_statement - : INSERT (OVERWRITE)? INTO table_name (LEFT_PAREN column_name_list RIGHT_PAREN)? query_expression + : INSERT (OVERWRITE)? INTO table_name (LEFT_PAREN column_reference_list RIGHT_PAREN)? query_expression | INSERT (OVERWRITE)? INTO LOCATION path=Character_String_Literal (USING storage_type=identifier (param_clause)?)? query_expression ; http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java b/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java index 465aaa8..51fe819 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/parser/SQLAnalyzer.java @@ -1627,10 +1627,12 @@ public class SQLAnalyzer extends SQLParserBaseVisitor<Expr> { if (ctx.table_name() != null) { insertExpr.setTableName(ctx.table_name().getText()); - if (ctx.column_name_list() != null) { - String[] targetColumns = new String[ctx.column_name_list().identifier().size()]; + if (ctx.column_reference_list() != null) { + ColumnReferenceExpr [] targetColumns = + new ColumnReferenceExpr[ctx.column_reference_list().column_reference().size()]; + for (int i = 0; i < targetColumns.length; i++) { - targetColumns[i] = ctx.column_name_list().identifier().get(i).getText(); + targetColumns[i] = visitColumn_reference(ctx.column_reference_list().column_reference(i)); } insertExpr.setTargetColumns(targetColumns); http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectNestedRecord.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectNestedRecord.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectNestedRecord.java index 9f8a5fd..ffd6f08 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectNestedRecord.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestSelectNestedRecord.java @@ -30,7 +30,7 @@ import static org.junit.Assert.assertEquals; public class TestSelectNestedRecord extends QueryTestCaseBase { @Test - public final void testSelect1() throws Exception { + public final void testSelect0() throws Exception { List<String> tables = executeDDL("sample1_ddl.sql", "sample1", "sample1"); assertEquals(TUtil.newList("sample1"), tables); @@ -40,6 +40,16 @@ public class TestSelectNestedRecord extends QueryTestCaseBase { } @Test + public final void testSelect1() throws Exception { + List<String> tables = executeDDL("sample1_ddl.sql", "sample1", "sample2"); + assertEquals(TUtil.newList("sample2"), tables); + + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + } + + @Test public final void testSelect2() throws Exception { List<String> tables = executeDDL("tweets_ddl.sql", "tweets", "tweets"); assertEquals(TUtil.newList("tweets"), tables); @@ -68,4 +78,32 @@ public class TestSelectNestedRecord extends QueryTestCaseBase { assertResultSet(res); cleanupQuery(res); } + + @Test + public final void testInsertType1() throws Exception { + // all columns + List<String> tables = executeDDL("sample1_ddl.sql", "sample1", "sample3"); + assertEquals(TUtil.newList("sample3"), tables); + + executeString("CREATE TABLE clone (title TEXT, name RECORD (first_name TEXT, last_name TEXT)) USING JSON;").close(); + + executeString("INSERT INTO clone (title, name.first_name, name.last_name) SELECT title, name.first_name, name.last_name from sample3").close(); + ResultSet res = executeString("select title, name.first_name, name.last_name from clone"); + assertResultSet(res); + res.close(); + } + + @Test + public final void testInsertType2() throws Exception { + // some columns + List<String> tables = executeDDL("sample1_ddl.sql", "sample1", "sample4"); + assertEquals(TUtil.newList("sample4"), tables); + + executeString("CREATE TABLE clone2 (title TEXT, name RECORD (first_name TEXT, last_name TEXT)) USING JSON;").close(); + + executeString("INSERT INTO clone2 (title, name.last_name) SELECT title, name.last_name from sample4").close(); + ResultSet res = executeString("select title, name.first_name, name.last_name from clone2"); + assertResultSet(res); + res.close(); + } } http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-core/src/test/resources/queries/TestSelectNestedRecord/sample0_ddl.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestSelectNestedRecord/sample0_ddl.sql b/tajo-core/src/test/resources/queries/TestSelectNestedRecord/sample0_ddl.sql new file mode 100644 index 0000000..ed6aee1 --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestSelectNestedRecord/sample0_ddl.sql @@ -0,0 +1 @@ +CREATE TABLE clone (title TEXT, name RECORD (first_name TEXT, last_name TEXT)) USING JSON; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-core/src/test/resources/queries/TestSelectNestedRecord/testSelect0.sql ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/queries/TestSelectNestedRecord/testSelect0.sql b/tajo-core/src/test/resources/queries/TestSelectNestedRecord/testSelect0.sql new file mode 100644 index 0000000..a594bcf --- /dev/null +++ b/tajo-core/src/test/resources/queries/TestSelectNestedRecord/testSelect0.sql @@ -0,0 +1 @@ +SELECT title, name.first_name, name.last_name FROM sample1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-core/src/test/resources/results/TestSelectNestedRecord/testInsertType1.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestSelectNestedRecord/testInsertType1.result b/tajo-core/src/test/resources/results/TestSelectNestedRecord/testInsertType1.result new file mode 100644 index 0000000..ec49cd5 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestSelectNestedRecord/testInsertType1.result @@ -0,0 +1,5 @@ +title,name/first_name,name/last_name +------------------------------- +Hand of the King,Eddard,Stark +Assassin,Arya,Stark +Dancing Master,Syrio,Forel \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-core/src/test/resources/results/TestSelectNestedRecord/testInsertType2.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestSelectNestedRecord/testInsertType2.result b/tajo-core/src/test/resources/results/TestSelectNestedRecord/testInsertType2.result new file mode 100644 index 0000000..a80edfe --- /dev/null +++ b/tajo-core/src/test/resources/results/TestSelectNestedRecord/testInsertType2.result @@ -0,0 +1,5 @@ +title,name/first_name,name/last_name +------------------------------- +Hand of the King,null,Stark +Assassin,null,Stark +Dancing Master,null,Forel \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-core/src/test/resources/results/TestSelectNestedRecord/testSelect0.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestSelectNestedRecord/testSelect0.result b/tajo-core/src/test/resources/results/TestSelectNestedRecord/testSelect0.result new file mode 100644 index 0000000..ec49cd5 --- /dev/null +++ b/tajo-core/src/test/resources/results/TestSelectNestedRecord/testSelect0.result @@ -0,0 +1,5 @@ +title,name/first_name,name/last_name +------------------------------- +Hand of the King,Eddard,Stark +Assassin,Arya,Stark +Dancing Master,Syrio,Forel \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java index c0727bb..b1d3e2e 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java @@ -56,7 +56,6 @@ import org.apache.tajo.plan.nameresolver.NameResolvingMode; import org.apache.tajo.plan.rewrite.rules.ProjectionPushDownRule; import org.apache.tajo.plan.util.ExprFinder; import org.apache.tajo.plan.util.PlannerUtil; -import org.apache.tajo.plan.verifier.VerifyException; import org.apache.tajo.storage.StorageService; import org.apache.tajo.util.KeyValueSet; import org.apache.tajo.util.Pair; @@ -1656,10 +1655,10 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex // See PreLogicalPlanVerifier.visitInsert. // It guarantees that the equivalence between the numbers of target and projected columns. - String [] targets = expr.getTargetColumns(); + ColumnReferenceExpr [] targets = expr.getTargetColumns(); Schema targetColumns = new Schema(); for (int i = 0; i < targets.length; i++) { - Column targetColumn = desc.getLogicalSchema().getColumn(targets[i]); + Column targetColumn = desc.getLogicalSchema().getColumn(targets[i].getCanonicalName().replace(".", "/")); if (targetColumn == null) { throw makeSyntaxError("column '" + targets[i] + "' of relation '" + desc.getName() + "' does not exist"); @@ -1702,15 +1701,20 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex } if (child instanceof Projectable) { - Projectable projectionNode = (Projectable)insertNode.getChild(); + Projectable projectionNode = insertNode.getChild(); // Modifying projected columns by adding NULL constants // It is because that table appender does not support target columns to be written. List<Target> targets = TUtil.newList(); - for (int i = 0; i < tableSchema.size(); i++) { - Column column = tableSchema.getColumn(i); + for (Column column : tableSchema.getAllColumns()) { int idxInProjectionNode = targetColumns.getIndex(column); + + // record type itself cannot be projected yet. + if (column.getDataType().getType() == TajoDataTypes.Type.RECORD) { + continue; + } + if (idxInProjectionNode >= 0 && idxInProjectionNode < projectionNode.getTargets().length) { targets.add(projectionNode.getTargets()[idxInProjectionNode]); } else { http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java index 73b7592..7ef483c 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java @@ -19,6 +19,7 @@ package org.apache.tajo.storage.json; +import com.facebook.presto.hive.shaded.com.google.common.collect.Lists; import io.netty.buffer.ByteBuf; import net.minidev.json.JSONObject; import net.minidev.json.parser.JSONParser; @@ -35,57 +36,29 @@ import org.apache.tajo.exception.UnimplementedException; import org.apache.tajo.storage.Tuple; import org.apache.tajo.storage.text.TextLineDeserializer; import org.apache.tajo.storage.text.TextLineParsingError; -import org.apache.tajo.util.TUtil; import java.io.IOException; import java.util.Map; public class JsonLineDeserializer extends TextLineDeserializer { private JSONParser parser; + // Full Path -> Type - private Map<String, Type> types; - private String [] projectedPaths; + private final Map<String, Type> types; + private final String [] projectedPaths; public JsonLineDeserializer(Schema schema, TableMeta meta, Column [] projected) { super(schema, meta); - projectedPaths = new String[projected.length]; - for (int i = 0; i < projected.length; i++) { - this.projectedPaths[i] = projected[i].getSimpleName(); - } + projectedPaths = SchemaUtil.convertColumnsToPaths(Lists.newArrayList(projected), true); + types = SchemaUtil.buildTypeMap(schema.getAllColumns(), projectedPaths); } @Override public void init() { - types = TUtil.newHashMap(); - for (Column column : schema.getAllColumns()) { - - // Keep types which only belong to projected paths - // For example, assume that a projected path is 'name/first_name', where name is RECORD and first_name is TEXT. - // In this case, we should keep two types: - // * name - RECORD - // * name/first_name TEXT - for (String p :projectedPaths) { - if (p.startsWith(column.getSimpleName())) { - types.put(column.getSimpleName(), column.getDataType().getType()); - } - } - } parser = new JSONParser(JSONParser.MODE_JSON_SIMPLE | JSONParser.IGNORE_CONTROL_CHAR); } - private static String makePath(String [] path, int depth) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i <= depth; i++) { - sb.append(path[i]); - if (i < depth) { - sb.append(NestedPathUtil.PATH_DELIMITER); - } - } - - return sb.toString(); - } - /** * * http://git-wip-us.apache.org/repos/asf/tajo/blob/a94936ae/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java index 4d5d2e6..99f81a2 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineSerializer.java @@ -21,6 +21,7 @@ package org.apache.tajo.storage.json; import net.minidev.json.JSONObject; import org.apache.commons.net.util.Base64; +import org.apache.tajo.catalog.NestedPathUtil; import org.apache.tajo.catalog.Schema; import org.apache.tajo.catalog.SchemaUtil; import org.apache.tajo.catalog.TableMeta; @@ -32,87 +33,103 @@ import org.apache.tajo.storage.text.TextLineSerializer; import java.io.IOException; import java.io.OutputStream; +import java.util.Map; public class JsonLineSerializer extends TextLineSerializer { - private Type [] types; - private String [] simpleNames; - private int columnNum; + // Full Path -> Type + private final Map<String, Type> types; + private final String [] projectedPaths; public JsonLineSerializer(Schema schema, TableMeta meta) { super(schema, meta); + + projectedPaths = SchemaUtil.convertColumnsToPaths(schema.getAllColumns(), true); + types = SchemaUtil.buildTypeMap(schema.getAllColumns(), projectedPaths); } @Override public void init() { - types = SchemaUtil.toTypes(schema); - simpleNames = SchemaUtil.toSimpleNames(schema); - columnNum = schema.size(); + } + + private void putValue(JSONObject json, + String fullPath, + String [] pathElements, + int depth, + int fieldIndex, + Tuple input) throws IOException { + String fieldName = pathElements[depth]; + + if (input.isBlankOrNull(fieldIndex)) { + return; + } + + switch (types.get(fullPath)) { + + case BOOLEAN: + json.put(fieldName, input.getBool(fieldIndex)); + break; + + case INT1: + case INT2: + json.put(fieldName, input.getInt2(fieldIndex)); + break; + + case INT4: + json.put(fieldName, input.getInt4(fieldIndex)); + break; + + case INT8: + json.put(fieldName, input.getInt8(fieldIndex)); + break; + + case FLOAT4: + json.put(fieldName, input.getFloat4(fieldIndex)); + break; + + case FLOAT8: + json.put(fieldName, input.getFloat8(fieldIndex)); + break; + + case CHAR: + case TEXT: + case VARCHAR: + case INET4: + case TIMESTAMP: + case DATE: + case TIME: + case INTERVAL: + json.put(fieldName, input.getText(fieldIndex)); + break; + + case BIT: + case BINARY: + case BLOB: + case VARBINARY: + json.put(fieldName, Base64.encodeBase64String(input.getBytes(fieldIndex))); + break; + + case NULL_TYPE: + break; + + case RECORD: + JSONObject record = json.containsKey(fieldName) ? (JSONObject) json.get(fieldName) : new JSONObject(); + json.put(fieldName, record); + putValue(record, fullPath + "/" + pathElements[depth + 1], pathElements, depth + 1, fieldIndex, input); + break; + + default: + throw new UnimplementedException(fieldName + "(" + types.get(fullPath).name() + ") is not supported."); + } } @Override public int serialize(OutputStream out, Tuple input) throws IOException { JSONObject jsonObject = new JSONObject(); - for (int i = 0; i < columnNum; i++) { - if (input.isBlankOrNull(i)) { - continue; - } - - String fieldName = simpleNames[i]; - Type type = types[i]; - - switch (type) { - - case BOOLEAN: - jsonObject.put(fieldName, input.getBool(i)); - break; - - case INT1: - case INT2: - jsonObject.put(fieldName, input.getInt2(i)); - break; - - case INT4: - jsonObject.put(fieldName, input.getInt4(i)); - break; - - case INT8: - jsonObject.put(fieldName, input.getInt8(i)); - break; - - case FLOAT4: - jsonObject.put(fieldName, input.getFloat4(i)); - break; - - case FLOAT8: - jsonObject.put(fieldName, input.getFloat8(i)); - break; - - case CHAR: - case TEXT: - case VARCHAR: - case INET4: - case TIMESTAMP: - case DATE: - case TIME: - case INTERVAL: - jsonObject.put(fieldName, input.getText(i)); - break; - - case BIT: - case BINARY: - case BLOB: - case VARBINARY: - jsonObject.put(fieldName, Base64.encodeBase64String(input.getBytes(i))); - break; - - case NULL_TYPE: - break; - - default: - throw new UnimplementedException(types[i].name() + " is not supported."); - } + for (int i = 0; i < projectedPaths.length; i++) { + String [] paths = projectedPaths[i].split(NestedPathUtil.PATH_DELIMITER); + putValue(jsonObject, paths[0], paths, 0, i, input); } String jsonStr = jsonObject.toJSONString(); @@ -123,6 +140,5 @@ public class JsonLineSerializer extends TextLineSerializer { @Override public void release() { - } }
