Repository: orc Updated Branches: refs/heads/master 7dfe4a748 -> 3f23d507c
ORC-321. Add pretty print option to the JSON schema finder tool. Fixes #230 Signed-off-by: Owen O'Malley <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/orc/repo Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/3f23d507 Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/3f23d507 Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/3f23d507 Branch: refs/heads/master Commit: 3f23d507c39be3503e73f516a451f47e0c4ee25e Parents: 7dfe4a7 Author: Owen O'Malley <[email protected]> Authored: Mon Mar 12 13:36:48 2018 -0700 Committer: Owen O'Malley <[email protected]> Committed: Tue Mar 20 14:58:48 2018 -0700 ---------------------------------------------------------------------- .../java/org/apache/orc/TypeDescription.java | 4 +- .../apache/orc/TypeDescriptionPrettyPrint.java | 131 +++++++++++++++++++ .../apache/orc/tools/json/JsonSchemaFinder.java | 5 + 3 files changed, 138 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/orc/blob/3f23d507/java/core/src/java/org/apache/orc/TypeDescription.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java b/java/core/src/java/org/apache/orc/TypeDescription.java index 86d88ff..d7e81cd 100644 --- a/java/core/src/java/org/apache/orc/TypeDescription.java +++ b/java/core/src/java/org/apache/orc/TypeDescription.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -46,7 +46,7 @@ public class TypeDescription private static final int DEFAULT_PRECISION = 38; private static final int DEFAULT_SCALE = 10; private static final int DEFAULT_LENGTH = 256; - private static final Pattern UNQUOTED_NAMES = Pattern.compile("^\\w+$"); + static final Pattern UNQUOTED_NAMES = Pattern.compile("^[a-zA-Z0-9_]+$"); @Override public int compareTo(TypeDescription other) { http://git-wip-us.apache.org/repos/asf/orc/blob/3f23d507/java/core/src/java/org/apache/orc/TypeDescriptionPrettyPrint.java ---------------------------------------------------------------------- diff --git a/java/core/src/java/org/apache/orc/TypeDescriptionPrettyPrint.java b/java/core/src/java/org/apache/orc/TypeDescriptionPrettyPrint.java new file mode 100644 index 0000000..0714224 --- /dev/null +++ b/java/core/src/java/org/apache/orc/TypeDescriptionPrettyPrint.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc; + +import java.io.PrintStream; +import java.util.List; + +/** + * A pretty printer for TypeDescription. + */ +public class TypeDescriptionPrettyPrint { + + static void pad(PrintStream output, int offset) { + for(int i=0; i < offset; ++i) { + output.print(' '); + } + } + + static void printFieldName(PrintStream output, String fieldName){ + if (TypeDescription.UNQUOTED_NAMES.matcher(fieldName).matches()) { + output.print(fieldName); + } else { + output.print('`'); + output.print(fieldName.replaceAll("`", "``")); + output.print('`'); + } + } + + static void printStruct(PrintStream output, + int offset, + TypeDescription type) { + output.print("<"); + List<TypeDescription> children = type.getChildren(); + List<String> fields = type.getFieldNames(); + for(int c = 0; c < children.size(); ++c) { + if (c == 0) { + output.println(); + } else { + output.println(","); + } + pad(output, offset + 2); + printFieldName(output, fields.get(c)); + output.print(':'); + printType(output, offset + 2, children.get(c)); + } + output.print('>'); + } + + static void printComplex(PrintStream output, + int offset, + TypeDescription type) { + output.print("<"); + List<TypeDescription> children = type.getChildren(); + for(int c = 0; c < children.size(); ++c) { + if (c != 0) { + output.print(","); + } + printType(output, offset + 2, children.get(c)); + } + output.print('>'); + } + + static void printType(PrintStream output, + int offset, + TypeDescription type) { + output.print(type.getCategory().getName()); + switch (type.getCategory()) { + case BOOLEAN: + case BINARY: + case BYTE: + case DATE: + case DOUBLE: + case FLOAT: + case INT: + case LONG: + case SHORT: + case STRING: + case TIMESTAMP: + break; + + case DECIMAL: + output.print('('); + output.print(type.getPrecision()); + output.print(','); + output.print(type.getScale()); + output.print(')'); + break; + + case CHAR: + case VARCHAR: + output.print('('); + output.print(type.getMaxLength()); + output.print(')'); + break; + + case STRUCT: + printStruct(output, offset, type); + break; + + case LIST: + case MAP: + case UNION: + printComplex(output, offset, type); + break; + + default: + throw new IllegalArgumentException("Unhandled type " + type); + } + } + + public static void print(PrintStream output, + TypeDescription schema) { + printType(output, 0, schema); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3f23d507/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java ---------------------------------------------------------------------- diff --git a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java index de36254..8b53ee1 100644 --- a/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java +++ b/java/tools/src/java/org/apache/orc/tools/json/JsonSchemaFinder.java @@ -31,6 +31,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.hadoop.conf.Configuration; import org.apache.orc.TypeDescription; +import org.apache.orc.TypeDescriptionPrettyPrint; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -335,6 +336,8 @@ public class JsonSchemaFinder { result.mergedType.printFlat(System.out, "root"); } else if (cli.hasOption('t')) { printAsTable(System.out, (StructType) result.mergedType); + } else if (cli.hasOption('p')) { + TypeDescriptionPrettyPrint.print(System.out, result.getSchema()); } else { System.out.println(result.getSchema()); } @@ -349,6 +352,8 @@ public class JsonSchemaFinder { .desc("Print types as flat list of types").build()); options.addOption(Option.builder("t").longOpt("table") .desc("Print types as Hive table declaration").build()); + options.addOption(Option.builder("p").longOpt("pretty") + .desc("Pretty print the schema").build()); CommandLine cli = new GnuParser().parse(options, args); if (cli.hasOption('h') || cli.getArgs().length == 0) { HelpFormatter formatter = new HelpFormatter();
