This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d02646  ORC-699. Minor improvements to the scan tool that make the 
exception messages optional. (#583)
8d02646 is described below

commit 8d02646755829701fe80e53eecf0623553e821a5
Author: Owen O'Malley <[email protected]>
AuthorDate: Sat Dec 19 01:52:27 2020 +0000

    ORC-699. Minor improvements to the scan tool that make the exception 
messages optional. (#583)
    
    ### What changes were proposed in this pull request?
    
    - Change the stripe id to be 0 based, which seems more consistent.
    - Change the end of file stripe id to be the number of stripes rather than 
-1.
    - Limit the recovery row to the start of the next stripe.
    - Add --verbose that prints exceptions.
    - Fix the --help to give more information
    
    ### How was this patch tested?
    
    Tested by hand on the example files.
    
    Signed-off-by: Owen O'Malley <[email protected]>
---
 .../src/java/org/apache/orc/tools/ScanData.java    | 35 ++++++++++++++--------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/java/tools/src/java/org/apache/orc/tools/ScanData.java 
b/java/tools/src/java/org/apache/orc/tools/ScanData.java
index 35fcb9d..9640cd2 100644
--- a/java/tools/src/java/org/apache/orc/tools/ScanData.java
+++ b/java/tools/src/java/org/apache/orc/tools/ScanData.java
@@ -19,6 +19,7 @@ package org.apache.orc.tools;
 
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.HelpFormatter;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.hadoop.conf.Configuration;
@@ -37,12 +38,13 @@ import java.util.List;
  */
 public class ScanData {
 
+  private static final Options OPTIONS = new Options()
+      .addOption("v", "verbose", false, "Print exceptions")
+      .addOption("s", "schema", false, "Print schema")
+      .addOption("h", "help", false, "Provide help");
 
   static CommandLine parseCommandLine(String[] args) throws ParseException {
-    Options options = new Options()
-        .addOption("s", "schema", false, "Print schema")
-        .addOption("h", "help", false, "Provide help");
-    return new DefaultParser().parse(options, args);
+    return new DefaultParser().parse(OPTIONS, args);
   }
 
   static int calculateBestVectorSize(int indexStride) {
@@ -84,15 +86,15 @@ public class ScanData {
     long firstRow = 0;
     int stripeId = 0;
     for (StripeInformation stripe: reader.getStripes()) {
-      stripeId += 1;
       long lastRow = firstRow + stripe.getNumberOfRows();
       if (firstRow <= row && row < lastRow) {
         return new LocationInfo(firstRow, lastRow, stripeId, row);
       }
       firstRow = lastRow;
+      stripeId += 1;
     }
     return new LocationInfo(reader.getNumberOfRows(),
-        reader.getNumberOfRows(), -1, row);
+        reader.getNumberOfRows(), reader.getStripes().size(), row);
   }
 
   /**
@@ -113,7 +115,8 @@ public class ScanData {
       result = current.followingRow;
     } else {
       long rowInStripe = current.row + batchSize - current.firstRow;
-      result = current.firstRow + (rowInStripe + stride - 1) / stride * stride;
+      result = Math.min(current.followingRow,
+          current.firstRow + (rowInStripe + stride - 1) / stride * stride);
     }
     return findStripeInfo(reader, result);
   }
@@ -149,10 +152,12 @@ public class ScanData {
   static void main(Configuration conf, String[] args) throws ParseException {
     CommandLine cli = parseCommandLine(args);
     if (cli.hasOption('h') || cli.getArgs().length == 0) {
-      System.err.println("usage: java -jar orc-tools-*.jar scan [--schema] 
[--help] <orc file>*");
+      new HelpFormatter().printHelp("java -jar orc-tools-*.jar scan",
+          OPTIONS);
       System.exit(1);
     } else {
-      boolean printSchema = cli.hasOption('s');
+      final boolean printSchema = cli.hasOption('s');
+      final boolean printExceptions = cli.hasOption('v');
       List<String> badFiles = new ArrayList<>();
       for (String file : cli.getArgs()) {
         try (Reader reader = FileDump.getReader(new Path(file), conf, 
badFiles)) {
@@ -182,11 +187,13 @@ public class ScanData {
                   LocationInfo recover = findRecoveryPoint(reader, current, 
batchSize);
                   System.out.println("Unable to read batch at " + current +
                       ", recovery at " + recover);
-                  e.printStackTrace();
+                  if (printExceptions) {
+                    e.printStackTrace();
+                  }
                   findBadColumns(reader, current, batchSize, 
reader.getSchema(),
                       new boolean[reader.getSchema().getMaximumId() + 1]);
-                  // There are no more rows, so break the loop
-                  if (recover.stripeId == -1) {
+                  // If we are at the end of the file, get out
+                  if (recover.row >= reader.getNumberOfRows()) {
                     break;
                   } else {
                     rows.seekToRow(recover.row);
@@ -203,7 +210,9 @@ public class ScanData {
         } catch (Exception e) {
           badFiles.add(file);
           System.err.println("Unable to open file: " + file);
-          e.printStackTrace();
+          if (printExceptions) {
+            e.printStackTrace();
+          }
         }
       }
       System.exit(badFiles.size());

Reply via email to