Author: sharad
Date: Tue Apr 21 07:34:00 2009
New Revision: 767055
URL: http://svn.apache.org/viewvc?rev=767055&view=rev
Log:
HADOOP-5697. Change org.apache.hadoop.examples.Grep to use new mapreduce api.
Contributed by Amareshwari Sriramadasu.
Added:
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapreduce/lib/map/RegexMapper.java
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/examples/org/apache/hadoop/examples/Grep.java
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/lib/RegexMapper.java
Modified: hadoop/core/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=767055&r1=767054&r2=767055&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Tue Apr 21 07:34:00 2009
@@ -243,6 +243,9 @@
HADOOP-5681. Change examples RandomWriter and RandomTextWriter to
use new mapreduce API. (Amareshwari Sriramadasu via sharad)
+ HADOOP-5697. Change org.apache.hadoop.examples.Grep to use new
+ mapreduce api. (Amareshwari Sriramadasu via sharad)
+
OPTIMIZATIONS
HADOOP-5595. NameNode does not need to run a replicator to choose a
Modified: hadoop/core/trunk/src/examples/org/apache/hadoop/examples/Grep.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/examples/org/apache/hadoop/examples/Grep.java?rev=767055&r1=767054&r2=767055&view=diff
==============================================================================
--- hadoop/core/trunk/src/examples/org/apache/hadoop/examples/Grep.java
(original)
+++ hadoop/core/trunk/src/examples/org/apache/hadoop/examples/Grep.java Tue Apr
21 07:34:00 2009
@@ -25,8 +25,14 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.*;
-import org.apache.hadoop.mapred.lib.*;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.map.InverseMapper;
+import org.apache.hadoop.mapreduce.lib.map.RegexMapper;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
@@ -38,14 +44,19 @@
if (args.length < 3) {
System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
ToolRunner.printGenericCommandUsage(System.out);
- return -1;
+ return 2;
}
Path tempDir =
new Path("grep-temp-"+
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
- JobConf grepJob = new JobConf(getConf(), Grep.class);
+ Configuration conf = getConf();
+ conf.set("mapred.mapper.regex", args[2]);
+ if (args.length == 4)
+ conf.set("mapred.mapper.regex.group", args[3]);
+
+ Job grepJob = new Job(conf);
try {
@@ -54,37 +65,34 @@
FileInputFormat.setInputPaths(grepJob, args[0]);
grepJob.setMapperClass(RegexMapper.class);
- grepJob.set("mapred.mapper.regex", args[2]);
- if (args.length == 4)
- grepJob.set("mapred.mapper.regex.group", args[3]);
grepJob.setCombinerClass(LongSumReducer.class);
grepJob.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(grepJob, tempDir);
- grepJob.setOutputFormat(SequenceFileOutputFormat.class);
+ grepJob.setOutputFormatClass(SequenceFileOutputFormat.class);
grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
- JobClient.runJob(grepJob);
+ grepJob.waitForCompletion(true);
- JobConf sortJob = new JobConf(Grep.class);
+ Job sortJob = new Job(conf);
sortJob.setJobName("grep-sort");
FileInputFormat.setInputPaths(sortJob, tempDir);
- sortJob.setInputFormat(SequenceFileInputFormat.class);
+ sortJob.setInputFormatClass(SequenceFileInputFormat.class);
sortJob.setMapperClass(InverseMapper.class);
sortJob.setNumReduceTasks(1); // write a single file
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
- sortJob.setOutputKeyComparatorClass // sort by decreasing freq
- (LongWritable.DecreasingComparator.class);
+ sortJob.setSortComparatorClass( // sort by decreasing freq
+ LongWritable.DecreasingComparator.class);
- JobClient.runJob(sortJob);
+ sortJob.waitForCompletion(true);
}
finally {
- FileSystem.get(grepJob).delete(tempDir, true);
+ FileSystem.get(conf).delete(tempDir, true);
}
return 0;
}
Modified:
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/lib/RegexMapper.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/lib/RegexMapper.java?rev=767055&r1=767054&r2=767055&view=diff
==============================================================================
--- hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/lib/RegexMapper.java
(original)
+++ hadoop/core/trunk/src/mapred/org/apache/hadoop/mapred/lib/RegexMapper.java
Tue Apr 21 07:34:00 2009
@@ -31,7 +31,10 @@
import org.apache.hadoop.mapred.Reporter;
-/** A {...@link Mapper} that extracts text matching a regular expression. */
+/** A {...@link Mapper} that extracts text matching a regular expression.
+ * @deprecated Use {...@link org.apache.hadoop.mapreduce.lib.map.RegexMapper}
+ */
+...@deprecated
public class RegexMapper<K> extends MapReduceBase
implements Mapper<K, Text, Text, LongWritable> {
Added:
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapreduce/lib/map/RegexMapper.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/mapred/org/apache/hadoop/mapreduce/lib/map/RegexMapper.java?rev=767055&view=auto
==============================================================================
---
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapreduce/lib/map/RegexMapper.java
(added)
+++
hadoop/core/trunk/src/mapred/org/apache/hadoop/mapreduce/lib/map/RegexMapper.java
Tue Apr 21 07:34:00 2009
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.map;
+
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+
+
+/** A {...@link Mapper} that extracts text matching a regular expression. */
+public class RegexMapper<K> extends Mapper<K, Text, Text, LongWritable> {
+
+ private Pattern pattern;
+ private int group;
+
+ public void setup(Context context) {
+ Configuration conf = context.getConfiguration();
+ pattern = Pattern.compile(conf.get("mapred.mapper.regex"));
+ group = conf.getInt("mapred.mapper.regex.group", 0);
+ }
+
+ public void map(K key, Text value,
+ Context context)
+ throws IOException, InterruptedException {
+ String text = value.toString();
+ Matcher matcher = pattern.matcher(text);
+ while (matcher.find()) {
+ context.write(new Text(matcher.group(group)), new LongWritable(1));
+ }
+ }
+}