On Oct 17, 2007, at 1:54 AM, Robert Jessop wrote:
My main questions are really to do with the finer points of the C++
vs Java implementations. I've not coded Java in years so I'm really
interested in how easy it is to simply wrap the C++ components for
the map and reduce phases.
There is a C++ API that is called Pipes.
http://lucene.apache.org/hadoop/api/org/apache/hadoop/mapred/pipes/
package-summary.html
and an example in src/examples/pipes. In particular, the famous word
count example in C++ looks like:
#include "hadoop/Pipes.hh"
#include "hadoop/TemplateFactory.hh"
#include "hadoop/StringUtils.hh"
class WordCountMap: public HadoopPipes::Mapper {
public:
WordCountMap(HadoopPipes::TaskContext& context){}
void map(HadoopPipes::MapContext& context) {
std::vector<std::string> words =
HadoopUtils::splitString(context.getInputValue(), " ");
for(unsigned int i=0; i < words.size(); ++i) {
context.emit(words[i], "1");
}
}
};
class WordCountReduce: public HadoopPipes::Reducer {
public:
WordCountReduce(HadoopPipes::TaskContext& context){}
void reduce(HadoopPipes::ReduceContext& context) {
int sum = 0;
while (context.nextValue()) {
sum += HadoopUtils::toInt(context.getInputValue());
}
context.emit(context.getInputKey(), HadoopUtils::toString(sum));
}
};
int main(int argc, char *argv[]) {
return HadoopPipes::runTask
(HadoopPipes::TemplateFactory<WordCountMap,
WordCountReduce>());
}