Author: cutting
Date: Wed Aug 10 15:07:43 2005
New Revision: 231338

URL: http://svn.apache.org/viewcvs?rev=231338&view=rev
Log:
Use compressed values in a few places.

Modified:
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
    
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/io/MapFile.java
    
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java
    
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java

Modified: 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java?rev=231338&r1=231337&r2=231338&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java 
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java 
Wed Aug 10 15:07:43 2005
@@ -131,6 +131,7 @@
 
     job.setOutputDir(newLinkDb);
     job.setOutputFormat(MapFileOutputFormat.class);
+    job.setBoolean("mapred.output.compress", true);
     job.setOutputKeyClass(UTF8.class);
     job.setOutputValueClass(Inlinks.class);
 

Modified: 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java?rev=231338&r1=231337&r2=231338&view=diff
==============================================================================
--- 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java 
(original)
+++ 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java 
Wed Aug 10 15:07:43 2005
@@ -96,7 +96,7 @@
       new MapFile.Writer(fs, text.toString(), UTF8.class, ParseText.class);
     
     final MapFile.Writer dataOut =
-      new MapFile.Writer(fs, data.toString(), UTF8.class, ParseData.class);
+      new MapFile.Writer(fs, data.toString(), UTF8.class,ParseData.class,true);
     
     final SequenceFile.Writer crawlOut =
       new SequenceFile.Writer(fs, crawl.toString(),

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/io/MapFile.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/io/MapFile.java?rev=231338&r1=231337&r2=231338&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/io/MapFile.java 
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/io/MapFile.java Wed 
Aug 10 15:07:43 2005
@@ -70,14 +70,31 @@
 
 
     /** Create the named map for keys of the named class. */
-    public Writer(NutchFileSystem nfs, String dirName, Class keyClass, Class 
valClass)
+    public Writer(NutchFileSystem nfs, String dirName,
+                  Class keyClass, Class valClass)
       throws IOException {
-      this(nfs, dirName, WritableComparator.get(keyClass), valClass);
+      this(nfs, dirName, WritableComparator.get(keyClass), valClass, false);
     }
 
+    /** Create the named map for keys of the named class. */
+    public Writer(NutchFileSystem nfs, String dirName,
+                  Class keyClass, Class valClass, boolean compress)
+      throws IOException {
+      this(nfs, dirName, WritableComparator.get(keyClass), valClass, compress);
+    }
+
+    /** Create the named map using the named key comparator. */
+    public Writer(NutchFileSystem nfs, String dirName,
+                  WritableComparator comparator, Class valClass)
+      throws IOException {
+      this(nfs, dirName, comparator, valClass, false);
+    }
     /** Create the named map using the named key comparator. */
-    public Writer(NutchFileSystem nfs, String dirName, WritableComparator 
-                  comparator, Class valClass) throws IOException {
+    public Writer(NutchFileSystem nfs, String dirName,
+                  WritableComparator comparator, Class valClass,
+                  boolean compress)
+      throws IOException {
+
       this.comparator = comparator;
       this.lastKey = comparator.newKey();
 
@@ -92,7 +109,8 @@
 
       Class keyClass = comparator.getKeyClass();
       this.data =
-        new SequenceFile.Writer(nfs, dataFile.getPath(), keyClass, valClass);
+        new SequenceFile.Writer(nfs, dataFile.getPath(), keyClass, valClass,
+                                compress);
       this.index =
         new SequenceFile.Writer(nfs, indexFile.getPath(),
                                 keyClass, LongWritable.class);

Modified: 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java?rev=231338&r1=231337&r2=231338&view=diff
==============================================================================
--- 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java
 (original)
+++ 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java
 Wed Aug 10 15:07:43 2005
@@ -35,7 +35,8 @@
     final MapFile.Writer out =
       new MapFile.Writer(fs, file.toString(),
                          job.getOutputKeyClass(),
-                         job.getOutputValueClass());
+                         job.getOutputValueClass(),
+                         job.getBoolean("mapred.output.compress", false));
 
     return new RecordWriter() {
 

Modified: 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java?rev=231338&r1=231337&r2=231338&view=diff
==============================================================================
--- 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java
 (original)
+++ 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java
 Wed Aug 10 15:07:43 2005
@@ -35,7 +35,8 @@
     final SequenceFile.Writer out =
       new SequenceFile.Writer(fs, file.toString(),
                               job.getOutputKeyClass(),
-                              job.getOutputValueClass());
+                              job.getOutputValueClass(),
+                              job.getBoolean("mapred.output.compress", false));
 
     return new RecordWriter() {
 


Reply via email to