Author: cutting
Date: Wed Aug 10 15:07:43 2005
New Revision: 231338
URL: http://svn.apache.org/viewcvs?rev=231338&view=rev
Log:
Use compressed values in a few places.
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/io/MapFile.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java?rev=231338&r1=231337&r2=231338&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java
Wed Aug 10 15:07:43 2005
@@ -131,6 +131,7 @@
job.setOutputDir(newLinkDb);
job.setOutputFormat(MapFileOutputFormat.class);
+ job.setBoolean("mapred.output.compress", true);
job.setOutputKeyClass(UTF8.class);
job.setOutputValueClass(Inlinks.class);
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java?rev=231338&r1=231337&r2=231338&view=diff
==============================================================================
---
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java
(original)
+++
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java
Wed Aug 10 15:07:43 2005
@@ -96,7 +96,7 @@
new MapFile.Writer(fs, text.toString(), UTF8.class, ParseText.class);
final MapFile.Writer dataOut =
- new MapFile.Writer(fs, data.toString(), UTF8.class, ParseData.class);
+ new MapFile.Writer(fs, data.toString(), UTF8.class,ParseData.class,true);
final SequenceFile.Writer crawlOut =
new SequenceFile.Writer(fs, crawl.toString(),
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/io/MapFile.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/io/MapFile.java?rev=231338&r1=231337&r2=231338&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/io/MapFile.java
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/io/MapFile.java Wed
Aug 10 15:07:43 2005
@@ -70,14 +70,31 @@
/** Create the named map for keys of the named class. */
- public Writer(NutchFileSystem nfs, String dirName, Class keyClass, Class
valClass)
+ public Writer(NutchFileSystem nfs, String dirName,
+ Class keyClass, Class valClass)
throws IOException {
- this(nfs, dirName, WritableComparator.get(keyClass), valClass);
+ this(nfs, dirName, WritableComparator.get(keyClass), valClass, false);
}
+ /** Create the named map for keys of the named class. */
+ public Writer(NutchFileSystem nfs, String dirName,
+ Class keyClass, Class valClass, boolean compress)
+ throws IOException {
+ this(nfs, dirName, WritableComparator.get(keyClass), valClass, compress);
+ }
+
+ /** Create the named map using the named key comparator. */
+ public Writer(NutchFileSystem nfs, String dirName,
+ WritableComparator comparator, Class valClass)
+ throws IOException {
+ this(nfs, dirName, comparator, valClass, false);
+ }
/** Create the named map using the named key comparator. */
- public Writer(NutchFileSystem nfs, String dirName, WritableComparator
- comparator, Class valClass) throws IOException {
+ public Writer(NutchFileSystem nfs, String dirName,
+ WritableComparator comparator, Class valClass,
+ boolean compress)
+ throws IOException {
+
this.comparator = comparator;
this.lastKey = comparator.newKey();
@@ -92,7 +109,8 @@
Class keyClass = comparator.getKeyClass();
this.data =
- new SequenceFile.Writer(nfs, dataFile.getPath(), keyClass, valClass);
+ new SequenceFile.Writer(nfs, dataFile.getPath(), keyClass, valClass,
+ compress);
this.index =
new SequenceFile.Writer(nfs, indexFile.getPath(),
keyClass, LongWritable.class);
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java?rev=231338&r1=231337&r2=231338&view=diff
==============================================================================
---
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java
(original)
+++
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java
Wed Aug 10 15:07:43 2005
@@ -35,7 +35,8 @@
final MapFile.Writer out =
new MapFile.Writer(fs, file.toString(),
job.getOutputKeyClass(),
- job.getOutputValueClass());
+ job.getOutputValueClass(),
+ job.getBoolean("mapred.output.compress", false));
return new RecordWriter() {
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java?rev=231338&r1=231337&r2=231338&view=diff
==============================================================================
---
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java
(original)
+++
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java
Wed Aug 10 15:07:43 2005
@@ -35,7 +35,8 @@
final SequenceFile.Writer out =
new SequenceFile.Writer(fs, file.toString(),
job.getOutputKeyClass(),
- job.getOutputValueClass());
+ job.getOutputValueClass(),
+ job.getBoolean("mapred.output.compress", false));
return new RecordWriter() {