Repository: parquet-mr Updated Branches: refs/heads/master 5c85b8dda -> ea402becc
PARQUET-668 - Provide option to disable auto crop feature in dump https://issues.apache.org/jira/browse/PARQUET-668 1. Added option `--disable-crop` 2. Updated `README.md` to reflect changes Author: djhworld <[email protected]> Closes #358 from djhworld/master and squashes the following commits: 493c3d0 [djhworld] PARQUET-668: Removed usage instructions from README, replaced with --help flag 696a5e6 [djhworld] PARQUET-668 -> Updated README.md to fix issue in usage string 6cbf59b [djhworld] PARQUET-668 - Provide option to disable auto crop feature in DumpCommand output Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/ea402bec Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/ea402bec Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/ea402bec Branch: refs/heads/master Commit: ea402becca436dc1a8e47ac9385a3db475b49355 Parents: 5c85b8d Author: djhworld <[email protected]> Authored: Wed Aug 3 14:14:26 2016 -0700 Committer: Julien Le Dem <[email protected]> Committed: Wed Aug 3 14:14:26 2016 -0700 ---------------------------------------------------------------------- parquet-tools/README.md | 47 +++----------------- .../parquet/tools/command/DumpCommand.java | 31 +++++++++---- 2 files changed, 27 insertions(+), 51 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ea402bec/parquet-tools/README.md ---------------------------------------------------------------------- diff --git a/parquet-tools/README.md b/parquet-tools/README.md index d60e1b4..49506f3 100644 --- a/parquet-tools/README.md +++ b/parquet-tools/README.md @@ -61,50 +61,13 @@ java jar ./parquet-tools-<VERSION>.jar <command> my_parquet_file.lzo.parquet ## Commands Usage -To run it on hadoop, you should use "hadoop jar" instead of "java jar" +To see usage instructions for all commands: -```sh -usage: java -jar ./parquet-tools-<VERSION>.jar cat [option...] <input> -where option is one of: - --debug Disable color output even if supported - -h,--help Show this help string - --no-color Disable color output even if supported -where <input> is the parquet file to print to stdout - -usage: java -jar ./parquet-tools-<VERSION>.jar head [option...] <input> -where option is one of: - --debug Disable color output even if supported - -h,--help Show this help string - -n,--records <arg> The number of records to show (default: 5) - --no-color Disable color output even if supported -where <input> is the parquet file to print to stdout - -usage: java -jar ./parquet-tools-<VERSION>.jar schema [option...] <input> -where option is one of: - -d,--detailed <arg> Show detailed information about the schema. - --debug Disable color output even if supported - -h,--help Show this help string - --no-color Disable color output even if supported -where <input> is the parquet file containing the schema to show - -usage: java -jar ./parquet-tools-<VERSION>.jar meta [option...] <input> -where option is one of: - --debug Disable color output even if supported - -h,--help Show this help string - --no-color Disable color output even if supported -where <input> is the parquet file to print to stdout - -usage: java -jar dump [option...] <input> -where option is one of: - -c,--column <arg> Dump only the given column, can be specified more than - once - -d,--disable-data Do not dump column data - --debug Disable color output even if supported - -h,--help Show this help string - -m,--disable-meta Do not dump row group and page metadata - --no-color Disable color output even if supported -where <input> is the parquet file to print to stdout ``` +java jar ./parquet-tools-<VERSION>.jar --help +``` + +**Note:** To run it on hadoop, you should use `hadoop jar` instead of `java jar` ## Meta Legend http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/ea402bec/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java ---------------------------------------------------------------------- diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java index 6d5e106..c4ed407 100644 --- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java +++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java @@ -85,6 +85,10 @@ public class DumpCommand extends ArgsOnlyCommand { .withDescription("Do not dump column data") .create('d'); + Option nocrop = OptionBuilder.withLongOpt("disable-crop") + .withDescription("Do not crop the output based on console width") + .create('n'); + Option cl = OptionBuilder.withLongOpt("column") .withDescription("Dump only the given column, can be specified more than once") .hasArgs() @@ -92,6 +96,7 @@ public class DumpCommand extends ArgsOnlyCommand { OPTIONS.addOption(md); OPTIONS.addOption(dt); + OPTIONS.addOption(nocrop); OPTIONS.addOption(cl); } @@ -122,17 +127,9 @@ public class DumpCommand extends ArgsOnlyCommand { ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath, NO_FILTER); MessageType schema = metaData.getFileMetaData().getSchema(); - PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter() - .withAutoColumn() - .withAutoCrop() - .withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES) - .withColumnPadding(1) - .withMaxBufferedLines(1000000) - .withFlushOnTab() - .build(); - boolean showmd = !options.hasOption('m'); boolean showdt = !options.hasOption('d'); + boolean cropoutput = !options.hasOption('n'); Set<String> showColumns = null; if (options.hasOption('c')) { @@ -140,6 +137,7 @@ public class DumpCommand extends ArgsOnlyCommand { showColumns = new HashSet<String>(Arrays.asList(cols)); } + PrettyPrintWriter out = prettyPrintWriter(cropoutput); dump(out, metaData, schema, inpath, showmd, showdt, showColumns); } @@ -346,6 +344,21 @@ public class DumpCommand extends ArgsOnlyCommand { return new BigInteger(data); } + private static PrettyPrintWriter prettyPrintWriter(boolean cropOutput) { + PrettyPrintWriter.Builder builder = PrettyPrintWriter.stdoutPrettyPrinter() + .withAutoColumn() + .withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES) + .withColumnPadding(1) + .withMaxBufferedLines(1000000) + .withFlushOnTab(); + + if (cropOutput) { + builder.withAutoCrop(); + } + + return builder.build(); + } + private static final class DumpGroupConverter extends GroupConverter { @Override public void start() { } @Override public void end() { }
