Github user naegelejd commented on a diff in the pull request:

    https://github.com/apache/nutch/pull/95#discussion_r64957448
  
    --- Diff: src/java/org/apache/nutch/indexer/IndexingJob.java ---
    @@ -155,43 +161,146 @@ public void index(Path crawlDb, Path linkDb, 
List<Path> segments,
                 counter.getName());
           }
           long end = System.currentTimeMillis();
    -      LOG.info("Indexer: finished at " + sdf.format(end) + ", elapsed: "
    -          + TimingUtil.elapsedTime(start, end));
    +      LOG.info("Indexer: finished at {}, elapsed: {}", sdf.format(end),
    +          TimingUtil.elapsedTime(start, end));
         } finally {
           FileSystem.get(job).delete(tmp, true);
         }
       }
     
       public int run(String[] args) throws Exception {
    -    if (args.length < 2) {
    -      System.err
    -      //.println("Usage: Indexer <crawldb> [-linkdb <linkdb>] [-params 
k1=v1&k2=v2...] (<segment> ... | -dir <segments>) [-noCommit] [-deleteGone] 
[-filter] [-normalize]");
    -      .println("Usage: Indexer <crawldb> [-linkdb <linkdb>] [-params 
k1=v1&k2=v2...] (<segment> ... | -dir <segments>) [-noCommit] [-deleteGone] 
[-filter] [-normalize] [-addBinaryContent] [-base64]");
    -      IndexWriters writers = new IndexWriters(getConf());
    -      System.err.println(writers.describe());
    -      return -1;
    -    }
    -
    -    final Path crawlDb = new Path(args[0]);
    -    Path linkDb = null;
    -
    -    final List<Path> segments = new ArrayList<Path>();
    -    String params = null;
    -
    -    boolean noCommit = false;
    -    boolean deleteGone = false;
    -    boolean filter = false;
    -    boolean normalize = false;
    -    boolean addBinaryContent = false;
    -    boolean base64 = false;
    +    // boolean options
    +    Option helpOpt = new Option("h", "help", false, "show this help 
message");
    +    // argument options
    +    @SuppressWarnings("static-access")
    +    Option crawldbOpt = OptionBuilder
    +    .withArgName("crawldb")
    +    .hasArg()
    +    .withDescription(
    +        "a crawldb directory to use with this tool (optional)")
    +    .create("crawldb");
    +    @SuppressWarnings("static-access")
    +    Option linkdbOpt = OptionBuilder
    +    .withArgName("linkdb")
    +    .hasArg()
    +    .withDescription(
    +        "a linkdb directory to use with this tool (optional)")
    +    .create("linkdb");
    +    @SuppressWarnings("static-access")
    +    Option paramsOpt = OptionBuilder
    +    .withArgName("params")
    +    .hasArg()
    +    .withDescription(
    +        "key value parameters to be used with this tool e.g. 
k1=v1&k2=v2... (optional)")
    +    .create("params");
    +    @SuppressWarnings("static-access")
    +    Option segOpt = OptionBuilder
    +    .withArgName("segment")
    +    .hasArgs()
    +    .withDescription("the segment(s) to use (either this or --segmentDir 
is mandatory)")
    +    .create("segment");
    +    @SuppressWarnings("static-access")
    +    Option segmentDirOpt = OptionBuilder
    +    .withArgName("segmentDir")
    +    .hasArg()
    +    .withDescription(
    +        "directory containing one or more segments to be used with this 
tool "
    +            + "(either this or --segment is mandatory)")
    +    .create("segmentDir");
    +    @SuppressWarnings("static-access")
    +    Option noCommitOpt = OptionBuilder
    +    .withArgName("noCommit")
    +    .withDescription(
    +        "do the commits once and for all the reducers in one go 
(optional)")
    --- End diff --
    
    This description is backward: the "-noCommit" option tells the Indexer 
*not* to do a final commit after the job finishes.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

Reply via email to