Author: gsingers
Date: Tue Jun  9 19:46:14 2009
New Revision: 783116

URL: http://svn.apache.org/viewvc?rev=783116&view=rev
Log:
Add in ability to output only a few chunks

Modified:
    
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java

Modified: 
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
URL: 
http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java?rev=783116&r1=783115&r2=783116&view=diff
==============================================================================
--- 
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
 (original)
+++ 
lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/bayes/WikipediaXmlSplitter.java
 Tue Jun  9 19:46:14 2009
@@ -57,7 +57,11 @@
     Option chunkSizeOpt = 
obuilder.withLongName("chunkSize").withRequired(true).withArgument(
             
abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create()).
             withDescription("The Size of the chunk, in 
megabytes").withShortName("c").create();
-    Group group = 
gbuilder.withName("Options").withOption(dumpFileOpt).withOption(outputDirOpt).withOption(chunkSizeOpt).create();
+    Option numChunksOpt = 
obuilder.withLongName("numChunks").withRequired(false).withArgument(
+            
abuilder.withName("numChunks").withMinimum(1).withMaximum(1).create()).
+            withDescription("The maximum number of chunks to create.  If 
specified, program will only create a subset of the 
chunks").withShortName("n").create();
+    Group group = 
gbuilder.withName("Options").withOption(dumpFileOpt).withOption(outputDirOpt).withOption(chunkSizeOpt).withOption(numChunksOpt).create();
+
     Parser parser = new Parser();
     parser.setGroup(group);
     CommandLine cmdLine = parser.parse(args);
@@ -66,6 +70,11 @@
     String outputDirPath = (String) cmdLine.getValue(outputDirOpt);
     int chunkSize = 1024 * 1024 * Integer.parseInt((String) 
cmdLine.getValue(chunkSizeOpt));
 
+    int numChunks = Integer.MAX_VALUE;
+    if (cmdLine.hasOption(numChunksOpt)){
+      numChunks = Integer.parseInt((String) cmdLine.getValue(numChunksOpt));
+    }
+
     BufferedReader dumpReader = new BufferedReader(new InputStreamReader(
         new FileInputStream(dumpFilePath), "UTF-8"));
 
@@ -128,7 +137,9 @@
 
           chunkWriter.write(content.toString(), 0, content.length());
           chunkWriter.close();
-
+          if (filenumber >= numChunks){
+            break;
+          }
           content = new StringBuilder();
           content.append(header);
         }


Reply via email to