Added: lucene/hadoop/trunk/src/examples/pipes/configure.ac URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/pipes/configure.ac?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/examples/pipes/configure.ac (added) +++ lucene/hadoop/trunk/src/examples/pipes/configure.ac Wed May 16 12:23:48 2007 @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ(2.59) +AC_INIT(hadoop-pipes-examples, 0.13.0, [EMAIL PROTECTED]) + +AM_INIT_AUTOMAKE([subdir-objects foreign no-dist]) + +AC_CONFIG_SRCDIR([impl/wordcount-simple.cc]) +AC_CONFIG_HEADER([impl/config.h]) +AC_CONFIG_FILES([Makefile]) + +AC_PREFIX_DEFAULT(`pwd`/../install) + +USE_HADOOP_PIPES + +# Checks for programs. +AC_PROG_CXX +AC_PROG_INSTALL +AC_PROG_LIBTOOL + +# Checks for libraries. + +# Checks for header files. +AC_LANG(C++) +AC_CHECK_HEADERS([unistd.h]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_HEADER_STDBOOL +AC_C_CONST +AC_TYPE_OFF_T +AC_TYPE_SIZE_T +AC_FUNC_STRERROR_R + +# Checks for library functions. +AC_CHECK_FUNCS([mkdir uname]) +AC_OUTPUT
Added: lucene/hadoop/trunk/src/examples/pipes/depcomp URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/pipes/depcomp?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/examples/pipes/depcomp (added) +++ lucene/hadoop/trunk/src/examples/pipes/depcomp Wed May 16 12:23:48 2007 @@ -0,0 +1,522 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2004-05-31.23 + +# Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva <[EMAIL PROTECTED]>. + +case $1 in + '') + echo "$0: No command. Try \`$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by `PROGRAMS ARGS'. + object Object file output by `PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputing dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to <[EMAIL PROTECTED]>. +EOF + exit 0 + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit 0 + ;; +esac + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. + "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz +## The second -e expression handles DOS-style file names with drive letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the `deleted header file' problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. + tr ' ' ' +' < "$tmpdepfile" | +## Some versions of gcc put a space before the `:'. On the theory +## that the space means something, we add a space to the output as +## well. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like `#:fec' to the end of the + # dependency line. + tr ' ' ' +' < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ + tr ' +' ' ' >> $depfile + echo >> $depfile + + # The second pass generates a dummy entry for each header file. + tr ' ' ' +' < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> $depfile + else + # The sourcefile does not contain any dependencies, so just + # store a dummy comment line, to avoid errors with the Makefile + # "include basename.Plo" scheme. + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts `$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'` + tmpdepfile="$stripped.u" + if test "$libtool" = yes; then + "$@" -Wc,-M + else + "$@" -M + fi + stat=$? + + if test -f "$tmpdepfile"; then : + else + stripped=`echo "$stripped" | sed 's,^.*/,,'` + tmpdepfile="$stripped.u" + fi + + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + + if test -f "$tmpdepfile"; then + outname="$stripped.o" + # Each line is of the form `foo.o: dependent.h'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile" + sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile" + else + # The sourcefile does not contain any dependencies, so just + # store a dummy comment line, to avoid errors with the Makefile + # "include basename.Plo" scheme. + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +icc) + # Intel's C compiler understands `-MD -MF file'. However on + # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c + # ICC 7.0 will fill foo.d with something like + # foo.o: sub/foo.c + # foo.o: sub/foo.h + # which is wrong. We want: + # sub/foo.o: sub/foo.c + # sub/foo.o: sub/foo.h + # sub/foo.c: + # sub/foo.h: + # ICC 7.1 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using \ : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | + sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in `foo.d' instead, so we check for that too. + # Subdirectories are respected. + dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` + test "x$dir" = "x$object" && dir= + base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` + + if test "$libtool" = yes; then + # Dependencies are output in .lo.d with libtool 1.4. + # With libtool 1.5 they are output both in $dir.libs/$base.o.d + # and in $dir.libs/$base.o.d and $dir$base.o.d. We process the + # latter, because the former will be cleaned when $dir.libs is + # erased. + tmpdepfile1="$dir.libs/$base.lo.d" + tmpdepfile2="$dir$base.o.d" + tmpdepfile3="$dir.libs/$base.d" + "$@" -Wc,-MD + else + tmpdepfile1="$dir$base.o.d" + tmpdepfile2="$dir$base.d" + tmpdepfile3="$dir$base.d" + "$@" -MD + fi + + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + if test -f "$tmpdepfile1"; then + tmpdepfile="$tmpdepfile1" + elif test -f "$tmpdepfile2"; then + tmpdepfile="$tmpdepfile2" + else + tmpdepfile="$tmpdepfile3" + fi + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" + # That's a tab and a space in the []. + sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" + else + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + + # Remove `-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for `:' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. + "$@" $dashmflag | + sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + tr ' ' ' +' < "$tmpdepfile" | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no + for arg in "$@"; do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix="`echo $object | sed 's/^.*\././'`" + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + sed '1,2d' "$tmpdepfile" | tr ' ' ' +' | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + + # Remove `-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E | + sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | + sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o, + # because we must use -o when running libtool. + "$@" || exit $? + IFS=" " + for arg + do + case "$arg" in + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" + echo " " >> "$depfile" + . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: Added: lucene/hadoop/trunk/src/examples/pipes/impl/config.h.in URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/pipes/impl/config.h.in?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/examples/pipes/impl/config.h.in (added) +++ lucene/hadoop/trunk/src/examples/pipes/impl/config.h.in Wed May 16 12:23:48 2007 @@ -0,0 +1,97 @@ +/* impl/config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you + don't. */ +#undef HAVE_DECL_STRERROR_R + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the `mkdir' function. */ +#undef HAVE_MKDIR + +/* Define to 1 if stdbool.h conforms to C99. */ +#undef HAVE_STDBOOL_H + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strerror_r' function. */ +#undef HAVE_STRERROR_R + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the `uname' function. */ +#undef HAVE_UNAME + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if the system has the type `_Bool'. */ +#undef HAVE__BOOL + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if strerror_r returns char *. */ +#undef STRERROR_R_CHAR_P + +/* Version number of package */ +#undef VERSION + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif + +/* Define for large files, on AIX-style hosts. */ +#undef _LARGE_FILES + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* Define to `long' if <sys/types.h> does not define. */ +#undef off_t + +/* Define to `unsigned' if <sys/types.h> does not define. */ +#undef size_t Added: lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-nopipe.cc URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-nopipe.cc?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-nopipe.cc (added) +++ lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-nopipe.cc Wed May 16 12:23:48 2007 @@ -0,0 +1,109 @@ +#include "hadoop/Pipes.hh" +#include "hadoop/TemplateFactory.hh" +#include "hadoop/StringUtils.hh" +#include "hadoop/SerialUtils.hh" + +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> + +class WordCountMap: public HadoopPipes::Mapper { +public: + WordCountMap(HadoopPipes::MapContext& context){} + void map(HadoopPipes::MapContext& context) { + std::vector<std::string> words = + HadoopUtils::splitString(context.getInputValue(), " "); + for(unsigned int i=0; i < words.size(); ++i) { + context.emit(words[i], "1"); + } + } +}; + +class WordCountReduce: public HadoopPipes::Reducer { +public: + WordCountReduce(HadoopPipes::ReduceContext& context){} + void reduce(HadoopPipes::ReduceContext& context) { + int sum = 0; + while (context.nextValue()) { + sum += HadoopUtils::toInt(context.getInputValue()); + } + context.emit(context.getInputKey(), HadoopUtils::toString(sum)); + } +}; + +class WordCountReader: public HadoopPipes::RecordReader { +private: + int64_t bytesTotal; + int64_t bytesRead; + FILE* file; +public: + WordCountReader(HadoopPipes::MapContext& context) { + std::string filename; + HadoopUtils::StringInStream stream(context.getInputSplit()); + HadoopUtils::deserializeString(filename, stream); + struct stat statResult; + stat(filename.c_str(), &statResult); + bytesTotal = statResult.st_size; + bytesRead = 0; + file = fopen(filename.c_str(), "rt"); + HADOOP_ASSERT(file != NULL, "failed to open " + filename); + } + + ~WordCountReader() { + fclose(file); + } + + virtual bool next(std::string& key, std::string& value) { + key = HadoopUtils::toString(ftell(file)); + int ch = getc(file); + bytesRead += 1; + value.clear(); + while (ch != -1 && ch != '\n') { + value += ch; + ch = getc(file); + bytesRead += 1; + } + return ch != -1; + } + + /** + * The progress of the record reader through the split as a value between + * 0.0 and 1.0. + */ + virtual float getProgress() { + if (bytesTotal > 0) { + return bytesRead / bytesTotal; + } else { + return 1.0f; + } + } +}; + +class WordCountWriter: public HadoopPipes::RecordWriter { +private: + FILE* file; +public: + WordCountWriter(HadoopPipes::ReduceContext& context) { + const HadoopPipes::JobConf* job = context.getJobConf(); + int part = job->getInt("mapred.task.partition"); + std::string outDir = job->get("mapred.output.dir"); + mkdir(outDir.c_str(), 0777); + std::string outFile = outDir + "/part-" + HadoopUtils::toString(part); + file = fopen(outFile.c_str(), "wt"); + } + + ~WordCountWriter() { + fclose(file); + } + + void emit(const std::string& key, const std::string& value) { + fprintf(file, "%s -> %s\n", key.c_str(), value.c_str()); + } +}; + +int main(int argc, char *argv[]) { + return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordCountMap, + WordCountReduce, void, void, WordCountReader, + WordCountWriter>()); +} + Added: lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-part.cc URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-part.cc?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-part.cc (added) +++ lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-part.cc Wed May 16 12:23:48 2007 @@ -0,0 +1,42 @@ +#include "hadoop/Pipes.hh" +#include "hadoop/TemplateFactory.hh" +#include "hadoop/StringUtils.hh" + +class WordCountMap: public HadoopPipes::Mapper { +public: + WordCountMap(HadoopPipes::TaskContext& context){} + void map(HadoopPipes::MapContext& context) { + std::vector<std::string> words = + HadoopUtils::splitString(context.getInputValue(), " "); + for(unsigned int i=0; i < words.size(); ++i) { + context.emit(words[i], "1"); + } + } +}; + +class WordCountReduce: public HadoopPipes::Reducer { +public: + WordCountReduce(HadoopPipes::TaskContext& context){} + void reduce(HadoopPipes::ReduceContext& context) { + int sum = 0; + while (context.nextValue()) { + sum += HadoopUtils::toInt(context.getInputValue()); + } + context.emit(context.getInputKey(), HadoopUtils::toString(sum)); + } +}; + +class WordCountPartitioner: public HadoopPipes::Partitioner { +public: + WordCountPartitioner(HadoopPipes::TaskContext& context){} + virtual int partition(const std::string& key, int numOfReduces) { + return 0; + } +}; + +int main(int argc, char *argv[]) { + return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordCountMap, + WordCountReduce,WordCountPartitioner, + WordCountReduce>()); +} + Added: lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-simple.cc URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-simple.cc?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-simple.cc (added) +++ lucene/hadoop/trunk/src/examples/pipes/impl/wordcount-simple.cc Wed May 16 12:23:48 2007 @@ -0,0 +1,33 @@ +#include "hadoop/Pipes.hh" +#include "hadoop/TemplateFactory.hh" +#include "hadoop/StringUtils.hh" + +class WordCountMap: public HadoopPipes::Mapper { +public: + WordCountMap(HadoopPipes::TaskContext& context){} + void map(HadoopPipes::MapContext& context) { + std::vector<std::string> words = + HadoopUtils::splitString(context.getInputValue(), " "); + for(unsigned int i=0; i < words.size(); ++i) { + context.emit(words[i], "1"); + } + } +}; + +class WordCountReduce: public HadoopPipes::Reducer { +public: + WordCountReduce(HadoopPipes::TaskContext& context){} + void reduce(HadoopPipes::ReduceContext& context) { + int sum = 0; + while (context.nextValue()) { + sum += HadoopUtils::toInt(context.getInputValue()); + } + context.emit(context.getInputKey(), HadoopUtils::toString(sum)); + } +}; + +int main(int argc, char *argv[]) { + return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordCountMap, + WordCountReduce>()); +} + Added: lucene/hadoop/trunk/src/examples/pipes/install-sh URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/pipes/install-sh?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/examples/pipes/install-sh (added) +++ lucene/hadoop/trunk/src/examples/pipes/install-sh Wed May 16 12:23:48 2007 @@ -0,0 +1,322 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2004-07-05.00 + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. It can only install one file at a time, a restriction +# shared with many OS's install programs. + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit="${DOITPROG-}" + +# put in absolute paths if you don't have them in your path; or use env. vars. + +mvprog="${MVPROG-mv}" +cpprog="${CPPROG-cp}" +chmodprog="${CHMODPROG-chmod}" +chownprog="${CHOWNPROG-chown}" +chgrpprog="${CHGRPPROG-chgrp}" +stripprog="${STRIPPROG-strip}" +rmprog="${RMPROG-rm}" +mkdirprog="${MKDIRPROG-mkdir}" + +chmodcmd="$chmodprog 0755" +chowncmd= +chgrpcmd= +stripcmd= +rmcmd="$rmprog -f" +mvcmd="$mvprog" +src= +dst= +dir_arg= +dstarg= +no_target_directory= + +usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: +-c (ignored) +-d create directories instead of installing files. +-g GROUP $chgrpprog installed files to GROUP. +-m MODE $chmodprog installed files to MODE. +-o USER $chownprog installed files to USER. +-s $stripprog installed files. +-t DIRECTORY install into DIRECTORY. +-T report an error if DSTFILE is a directory. +--help display this help and exit. +--version display version info and exit. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG +" + +while test -n "$1"; do + case $1 in + -c) shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + --help) echo "$usage"; exit 0;; + + -m) chmodcmd="$chmodprog $2" + shift + shift + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -s) stripcmd=$stripprog + shift + continue;; + + -t) dstarg=$2 + shift + shift + continue;; + + -T) no_target_directory=true + shift + continue;; + + --version) echo "$0 $scriptversion"; exit 0;; + + *) # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + test -n "$dir_arg$dstarg" && break + # Otherwise, the last argument is the destination. Remove it from [EMAIL PROTECTED] + for arg + do + if test -n "$dstarg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dstarg" + shift # fnord + fi + shift # arg + dstarg=$arg + done + break;; + esac +done + +if test -z "$1"; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call `install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +for src +do + # Protect names starting with `-'. + case $src in + -*) src=./$src ;; + esac + + if test -n "$dir_arg"; then + dst=$src + src= + + if test -d "$dst"; then + mkdircmd=: + chmodcmd= + else + mkdircmd=$mkdirprog + fi + else + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dstarg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + + dst=$dstarg + # Protect names starting with `-'. + case $dst in + -*) dst=./$dst ;; + esac + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test -n "$no_target_directory"; then + echo "$0: $dstarg: Is a directory" >&2 + exit 1 + fi + dst=$dst/`basename "$src"` + fi + fi + + # This sed command emulates the dirname command. + dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + + # Make sure that the destination directory exists. + + # Skip lots of stat calls in the usual case. + if test ! -d "$dstdir"; then + defaultIFS=' + ' + IFS="${IFS-$defaultIFS}" + + oIFS=$IFS + # Some sh's can't handle IFS=/ for some reason. + IFS='%' + set - `echo "$dstdir" | sed -e 's@/@[EMAIL PROTECTED]' -e '[EMAIL PROTECTED]@/@'` + IFS=$oIFS + + pathcomp= + + while test $# -ne 0 ; do + pathcomp=$pathcomp$1 + shift + if test ! -d "$pathcomp"; then + $mkdirprog "$pathcomp" + # mkdir can fail with a `File exist' error in case several + # install-sh are creating the directory concurrently. This + # is OK. + test -d "$pathcomp" || exit + fi + pathcomp=$pathcomp/ + done + fi + + if test -n "$dir_arg"; then + $doit $mkdircmd "$dst" \ + && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \ + && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \ + && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \ + && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; } + + else + dstfile=`basename "$dst"` + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0 + trap '(exit $?); exit' 1 2 13 15 + + # Copy the file name to the temp name. + $doit $cpprog "$src" "$dsttmp" && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \ + && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \ + && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \ + && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } && + + # Now rename the file to the real destination. + { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \ + || { + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + if test -f "$dstdir/$dstfile"; then + $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \ + || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \ + || { + echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2 + (exit 1); exit + } + else + : + fi + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dstdir/$dstfile" + } + } + fi || { (exit 1); exit; } +done + +# The final little trick to "correctly" pass the exit status to the exit trap. +{ + (exit 0); exit +} + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: