Added: lucene/hadoop/trunk/src/c++/utils/configure.ac URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/utils/configure.ac?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/c++/utils/configure.ac (added) +++ lucene/hadoop/trunk/src/c++/utils/configure.ac Wed May 16 12:23:48 2007 @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ(2.59) +AC_INIT(hadoop-utils, 0.13.0, [EMAIL PROTECTED]) + +AM_INIT_AUTOMAKE([subdir-objects foreign no-dist]) + +AC_CONFIG_SRCDIR([impl/SerialUtils.cc]) +AC_CONFIG_HEADER([impl/config.h]) +AC_CONFIG_FILES([Makefile]) + +AC_PREFIX_DEFAULT(`pwd`/../install) + +CHECK_INSTALL_CFLAG +HADOOP_UTILS_SETUP + +# Checks for programs. +AC_PROG_CXX +AC_PROG_LIBTOOL + +# Checks for libraries. + +# Checks for header files. +AC_LANG(C++) +AC_CHECK_HEADERS([unistd.h]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_HEADER_STDBOOL +AC_C_CONST +AC_TYPE_OFF_T +AC_TYPE_SIZE_T +AC_FUNC_STRERROR_R + +# Checks for library functions. +AC_CHECK_FUNCS([mkdir uname]) +AC_OUTPUT
Added: lucene/hadoop/trunk/src/c++/utils/depcomp URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/utils/depcomp?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/c++/utils/depcomp (added) +++ lucene/hadoop/trunk/src/c++/utils/depcomp Wed May 16 12:23:48 2007 @@ -0,0 +1,522 @@ +#! /bin/sh +# depcomp - compile a program generating dependencies as side-effects + +scriptversion=2004-05-31.23 + +# Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva <[EMAIL PROTECTED]>. + +case $1 in + '') + echo "$0: No command. Try \`$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by `PROGRAMS ARGS'. + object Object file output by `PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputing dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to <[EMAIL PROTECTED]>. +EOF + exit 0 + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit 0 + ;; +esac + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. + "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz +## The second -e expression handles DOS-style file names with drive letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the `deleted header file' problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. + tr ' ' ' +' < "$tmpdepfile" | +## Some versions of gcc put a space before the `:'. On the theory +## that the space means something, we add a space to the output as +## well. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like `#:fec' to the end of the + # dependency line. + tr ' ' ' +' < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ + tr ' +' ' ' >> $depfile + echo >> $depfile + + # The second pass generates a dummy entry for each header file. + tr ' ' ' +' < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> $depfile + else + # The sourcefile does not contain any dependencies, so just + # store a dummy comment line, to avoid errors with the Makefile + # "include basename.Plo" scheme. + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts `$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'` + tmpdepfile="$stripped.u" + if test "$libtool" = yes; then + "$@" -Wc,-M + else + "$@" -M + fi + stat=$? + + if test -f "$tmpdepfile"; then : + else + stripped=`echo "$stripped" | sed 's,^.*/,,'` + tmpdepfile="$stripped.u" + fi + + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + + if test -f "$tmpdepfile"; then + outname="$stripped.o" + # Each line is of the form `foo.o: dependent.h'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile" + sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile" + else + # The sourcefile does not contain any dependencies, so just + # store a dummy comment line, to avoid errors with the Makefile + # "include basename.Plo" scheme. + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +icc) + # Intel's C compiler understands `-MD -MF file'. However on + # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c + # ICC 7.0 will fill foo.d with something like + # foo.o: sub/foo.c + # foo.o: sub/foo.h + # which is wrong. We want: + # sub/foo.o: sub/foo.c + # sub/foo.o: sub/foo.h + # sub/foo.c: + # sub/foo.h: + # ICC 7.1 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using \ : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | + sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in `foo.d' instead, so we check for that too. + # Subdirectories are respected. + dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` + test "x$dir" = "x$object" && dir= + base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` + + if test "$libtool" = yes; then + # Dependencies are output in .lo.d with libtool 1.4. + # With libtool 1.5 they are output both in $dir.libs/$base.o.d + # and in $dir.libs/$base.o.d and $dir$base.o.d. We process the + # latter, because the former will be cleaned when $dir.libs is + # erased. + tmpdepfile1="$dir.libs/$base.lo.d" + tmpdepfile2="$dir$base.o.d" + tmpdepfile3="$dir.libs/$base.d" + "$@" -Wc,-MD + else + tmpdepfile1="$dir$base.o.d" + tmpdepfile2="$dir$base.d" + tmpdepfile3="$dir$base.d" + "$@" -MD + fi + + stat=$? + if test $stat -eq 0; then : + else + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + if test -f "$tmpdepfile1"; then + tmpdepfile="$tmpdepfile1" + elif test -f "$tmpdepfile2"; then + tmpdepfile="$tmpdepfile2" + else + tmpdepfile="$tmpdepfile3" + fi + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" + # That's a tab and a space in the []. + sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" + else + echo "#dummy" > "$depfile" + fi + rm -f "$tmpdepfile" + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + + # Remove `-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for `:' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. + "$@" $dashmflag | + sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + tr ' ' ' +' < "$tmpdepfile" | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no + for arg in "$@"; do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix="`echo $object | sed 's/^.*\././'`" + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + sed '1,2d' "$tmpdepfile" | tr ' ' ' +' | \ +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test $1 != '--mode=compile'; do + shift + done + shift + fi + + # Remove `-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E | + sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | + sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o, + # because we must use -o when running libtool. + "$@" || exit $? + IFS=" " + for arg + do + case "$arg" in + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" + echo " " >> "$depfile" + . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: Added: lucene/hadoop/trunk/src/c++/utils/impl/SerialUtils.cc URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/utils/impl/SerialUtils.cc?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/c++/utils/impl/SerialUtils.cc (added) +++ lucene/hadoop/trunk/src/c++/utils/impl/SerialUtils.cc Wed May 16 12:23:48 2007 @@ -0,0 +1,287 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "hadoop/SerialUtils.hh" +#include "hadoop/StringUtils.hh" + +#include <errno.h> +#include <rpc/xdr.h> +#include <string> + +using std::string; + +namespace HadoopUtils { + + Error::Error(const std::string& msg): error(msg) { + } + + Error::Error(const std::string& msg, + const std::string& file, int line, + const std::string& function) { + error = msg + " at " + file + ":" + toString(line) + + " in " + function; + } + + const std::string& Error::getMessage() const { + return error; + } + + FileInStream::FileInStream() + { + mFile = NULL; + isOwned = false; + } + + bool FileInStream::open(const std::string& name) + { + mFile = fopen(name.c_str(), "rb"); + isOwned = true; + return (mFile != NULL); + } + + bool FileInStream::open(FILE* file) + { + mFile = file; + isOwned = false; + return (mFile != NULL); + } + + void FileInStream::read(void *buf, size_t len) + { + size_t result = fread(buf, len, 1, mFile); + if (result == 0) { + if (feof(mFile)) { + HADOOP_ASSERT(false, "end of file"); + } else { + HADOOP_ASSERT(false, string("read error on file: ") + strerror(errno)); + } + } + } + + bool FileInStream::skip(size_t nbytes) + { + return (0==fseek(mFile, nbytes, SEEK_CUR)); + } + + bool FileInStream::close() + { + int ret = 0; + if (mFile != NULL && isOwned) { + ret = fclose(mFile); + } + mFile = NULL; + return (ret==0); + } + + FileInStream::~FileInStream() + { + if (mFile != NULL) { + close(); + } + } + + FileOutStream::FileOutStream() + { + mFile = NULL; + isOwned = false; + } + + bool FileOutStream::open(const std::string& name, bool overwrite) + { + if (!overwrite) { + mFile = fopen(name.c_str(), "rb"); + if (mFile != NULL) { + fclose(mFile); + return false; + } + } + mFile = fopen(name.c_str(), "wb"); + isOwned = true; + return (mFile != NULL); + } + + bool FileOutStream::open(FILE* file) + { + mFile = file; + isOwned = false; + return (mFile != NULL); + } + + void FileOutStream::write(const void* buf, size_t len) + { + size_t result = fwrite(buf, len, 1, mFile); + HADOOP_ASSERT(result == 1, + string("write error to file: ") + strerror(errno)); + } + + bool FileOutStream::advance(size_t nbytes) + { + return (0==fseek(mFile, nbytes, SEEK_CUR)); + } + + bool FileOutStream::close() + { + int ret = 0; + if (mFile != NULL && isOwned) { + ret = fclose(mFile); + } + mFile = NULL; + return (ret == 0); + } + + FileOutStream::~FileOutStream() + { + if (mFile != NULL) { + close(); + } + } + + StringInStream::StringInStream(const std::string& str): buffer(str) { + itr = buffer.begin(); + } + + void StringInStream::read(void *buf, size_t buflen) { + size_t bytes = 0; + char* output = (char*) buf; + std::string::const_iterator end = buffer.end(); + while (bytes < buflen) { + output[bytes++] = *itr; + ++itr; + if (itr == end) { + break; + } + } + HADOOP_ASSERT(bytes == buflen, "unexpected end of string reached"); + } + + void serializeInt(int32_t t, OutStream& stream) { + serializeLong(t,stream); + } + + void serializeLong(int64_t t, OutStream& stream) + { + if (t >= -112 && t <= 127) { + int8_t b = t; + stream.write(&b, 1); + return; + } + + int8_t len = -112; + if (t < 0) { + t ^= -1ll; // reset the sign bit + len = -120; + } + + uint64_t tmp = t; + while (tmp != 0) { + tmp = tmp >> 8; + len--; + } + + stream.write(&len, 1); + len = (len < -120) ? -(len + 120) : -(len + 112); + + for (uint32_t idx = len; idx != 0; idx--) { + uint32_t shiftbits = (idx - 1) * 8; + uint64_t mask = 0xFFll << shiftbits; + uint8_t b = (t & mask) >> shiftbits; + stream.write(&b, 1); + } + } + + int32_t deserializeInt(InStream& stream) { + return deserializeLong(stream); + } + + int64_t deserializeLong(InStream& stream) + { + int8_t b; + stream.read(&b, 1); + if (b >= -112) { + return b; + } + bool negative; + int len; + if (b < -120) { + negative = true; + len = -120 - b; + } else { + negative = false; + len = -112 - b; + } + uint8_t barr[len]; + stream.read(barr, len); + int64_t t = 0; + for (int idx = 0; idx < len; idx++) { + t = t << 8; + t |= (barr[idx] & 0xFF); + } + if (negative) { + t ^= -1ll; + } + return t; + } + + void serializeFloat(float t, OutStream& stream) + { + char buf[sizeof(float)]; + XDR xdrs; + xdrmem_create(&xdrs, buf, sizeof(float), XDR_ENCODE); + xdr_float(&xdrs, &t); + stream.write(buf, sizeof(float)); + } + + void deserializeFloat(float& t, InStream& stream) + { + char buf[sizeof(float)]; + stream.read(buf, sizeof(float)); + XDR xdrs; + xdrmem_create(&xdrs, buf, sizeof(float), XDR_DECODE); + xdr_float(&xdrs, &t); + } + + void serializeString(const std::string& t, OutStream& stream) + { + serializeInt(t.length(), stream); + if (t.length() > 0) { + stream.write(t.data(), t.length()); + } + } + + void deserializeString(std::string& t, InStream& stream) + { + int32_t len = deserializeInt(stream); + if (len > 0) { + // resize the string to the right length + t.resize(len); + // read into the string in 64k chunks + const int bufSize = 65536; + int offset = 0; + char buf[bufSize]; + while (len > 0) { + int chunkLength = len > bufSize ? bufSize : len; + stream.read(buf, chunkLength); + t.replace(offset, chunkLength, buf, chunkLength); + offset += chunkLength; + len -= chunkLength; + } + } else { + t.clear(); + } + } + +} Added: lucene/hadoop/trunk/src/c++/utils/impl/StringUtils.cc URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/utils/impl/StringUtils.cc?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/c++/utils/impl/StringUtils.cc (added) +++ lucene/hadoop/trunk/src/c++/utils/impl/StringUtils.cc Wed May 16 12:23:48 2007 @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "hadoop/StringUtils.hh" +#include "hadoop/SerialUtils.hh" + +#include <errno.h> +#include <stdint.h> +#include <stdio.h> +#include <strings.h> +#include <sys/time.h> + +using std::string; +using std::vector; + +namespace HadoopUtils { + + string toString(int32_t x) { + char str[100]; + sprintf(str, "%d", x); + return str; + } + + int toInt(const string& val) { + int result; + char trash; + int num = sscanf(val.c_str(), "%d%c", &result, &trash); + HADOOP_ASSERT(num == 1, + "Problem converting " + val + " to integer."); + return result; + } + + float toFloat(const string& val) { + float result; + char trash; + int num = sscanf(val.c_str(), "%f%c", &result, &trash); + HADOOP_ASSERT(num == 1, + "Problem converting " + val + " to float."); + return result; + } + + bool toBool(const string& val) { + if (val == "true") { + return true; + } else if (val == "false") { + return false; + } else { + HADOOP_ASSERT(false, + "Problem converting " + val + " to boolean."); + } + } + + /** + * Get the current time in the number of milliseconds since 1970. + */ + uint64_t getCurrentMillis() { + struct timeval tv; + struct timezone tz; + int sys = gettimeofday(&tv, &tz); + HADOOP_ASSERT(sys != -1, strerror(errno)); + return tv.tv_sec * 1000 + tv.tv_usec / 1000; + } + + vector<string> splitString(const std::string& str, + const char* separator) { + vector<string> result; + string::size_type prev_pos=0; + string::size_type pos=0; + while ((pos = str.find_first_of(separator, prev_pos)) != string::npos) { + if (prev_pos < pos) { + result.push_back(str.substr(prev_pos, pos-prev_pos)); + } + prev_pos = pos + 1; + } + if (prev_pos < str.size()) { + result.push_back(str.substr(prev_pos)); + } + return result; + } + + string quoteString(const string& str, + const char* deliminators) { + + string result(str); + for(int i=result.length() -1; i >= 0; --i) { + char ch = result[i]; + if (!isprint(ch) || + ch == '\\' || + strchr(deliminators, ch)) { + switch (ch) { + case '\\': + result.replace(i, 1, "\\\\"); + break; + case '\t': + result.replace(i, 1, "\\t"); + break; + case '\n': + result.replace(i, 1, "\\n"); + break; + case ' ': + result.replace(i, 1, "\\s"); + break; + default: + char buff[4]; + sprintf(buff, "\\%02x", static_cast<unsigned char>(result[i])); + result.replace(i, 1, buff); + } + } + } + return result; + } + + string unquoteString(const string& str) { + string result(str); + string::size_type current = result.find('\\'); + while (current != string::npos) { + if (current + 1 < result.size()) { + char new_ch; + int num_chars; + if (isxdigit(result[current+1])) { + num_chars = 2; + HADOOP_ASSERT(current + num_chars < result.size(), + "escape pattern \\<hex><hex> is missing second digit in '" + + str + "'"); + char sub_str[3]; + sub_str[0] = result[current+1]; + sub_str[1] = result[current+2]; + sub_str[2] = '\0'; + char* end_ptr = NULL; + long int int_val = strtol(sub_str, &end_ptr, 16); + HADOOP_ASSERT(*end_ptr == '\0' && int_val >= 0, + "escape pattern \\<hex><hex> is broken in '" + str + "'"); + new_ch = static_cast<char>(int_val); + } else { + num_chars = 1; + switch(result[current+1]) { + case '\\': + new_ch = '\\'; + break; + case 't': + new_ch = '\t'; + break; + case 'n': + new_ch = '\n'; + break; + case 's': + new_ch = ' '; + break; + default: + string msg("unknow n escape character '"); + msg += result[current+1]; + HADOOP_ASSERT(false, msg + "' found in '" + str + "'"); + } + } + result.replace(current, 1 + num_chars, 1, new_ch); + current = result.find('\\', current+1); + } else { + HADOOP_ASSERT(false, "trailing \\ in '" + str + "'"); + } + } + return result; + } + +} Added: lucene/hadoop/trunk/src/c++/utils/impl/config.h.in URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/utils/impl/config.h.in?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/c++/utils/impl/config.h.in (added) +++ lucene/hadoop/trunk/src/c++/utils/impl/config.h.in Wed May 16 12:23:48 2007 @@ -0,0 +1,97 @@ +/* impl/config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you + don't. */ +#undef HAVE_DECL_STRERROR_R + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the `mkdir' function. */ +#undef HAVE_MKDIR + +/* Define to 1 if stdbool.h conforms to C99. */ +#undef HAVE_STDBOOL_H + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strerror_r' function. */ +#undef HAVE_STRERROR_R + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the `uname' function. */ +#undef HAVE_UNAME + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if the system has the type `_Bool'. */ +#undef HAVE__BOOL + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if strerror_r returns char *. */ +#undef STRERROR_R_CHAR_P + +/* Version number of package */ +#undef VERSION + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# undef _GNU_SOURCE +#endif + +/* Define for large files, on AIX-style hosts. */ +#undef _LARGE_FILES + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* Define to `long' if <sys/types.h> does not define. */ +#undef off_t + +/* Define to `unsigned' if <sys/types.h> does not define. */ +#undef size_t Added: lucene/hadoop/trunk/src/c++/utils/install-sh URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/utils/install-sh?view=auto&rev=538693 ============================================================================== --- lucene/hadoop/trunk/src/c++/utils/install-sh (added) +++ lucene/hadoop/trunk/src/c++/utils/install-sh Wed May 16 12:23:48 2007 @@ -0,0 +1,322 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2004-07-05.00 + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. It can only install one file at a time, a restriction +# shared with many OS's install programs. + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit="${DOITPROG-}" + +# put in absolute paths if you don't have them in your path; or use env. vars. + +mvprog="${MVPROG-mv}" +cpprog="${CPPROG-cp}" +chmodprog="${CHMODPROG-chmod}" +chownprog="${CHOWNPROG-chown}" +chgrpprog="${CHGRPPROG-chgrp}" +stripprog="${STRIPPROG-strip}" +rmprog="${RMPROG-rm}" +mkdirprog="${MKDIRPROG-mkdir}" + +chmodcmd="$chmodprog 0755" +chowncmd= +chgrpcmd= +stripcmd= +rmcmd="$rmprog -f" +mvcmd="$mvprog" +src= +dst= +dir_arg= +dstarg= +no_target_directory= + +usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: +-c (ignored) +-d create directories instead of installing files. +-g GROUP $chgrpprog installed files to GROUP. +-m MODE $chmodprog installed files to MODE. +-o USER $chownprog installed files to USER. +-s $stripprog installed files. +-t DIRECTORY install into DIRECTORY. +-T report an error if DSTFILE is a directory. +--help display this help and exit. +--version display version info and exit. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG +" + +while test -n "$1"; do + case $1 in + -c) shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + --help) echo "$usage"; exit 0;; + + -m) chmodcmd="$chmodprog $2" + shift + shift + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -s) stripcmd=$stripprog + shift + continue;; + + -t) dstarg=$2 + shift + shift + continue;; + + -T) no_target_directory=true + shift + continue;; + + --version) echo "$0 $scriptversion"; exit 0;; + + *) # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + test -n "$dir_arg$dstarg" && break + # Otherwise, the last argument is the destination. Remove it from [EMAIL PROTECTED] + for arg + do + if test -n "$dstarg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dstarg" + shift # fnord + fi + shift # arg + dstarg=$arg + done + break;; + esac +done + +if test -z "$1"; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call `install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +for src +do + # Protect names starting with `-'. + case $src in + -*) src=./$src ;; + esac + + if test -n "$dir_arg"; then + dst=$src + src= + + if test -d "$dst"; then + mkdircmd=: + chmodcmd= + else + mkdircmd=$mkdirprog + fi + else + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dstarg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + + dst=$dstarg + # Protect names starting with `-'. + case $dst in + -*) dst=./$dst ;; + esac + + # If destination is a directory, append the input filename; won't work + # if double slashes aren't ignored. + if test -d "$dst"; then + if test -n "$no_target_directory"; then + echo "$0: $dstarg: Is a directory" >&2 + exit 1 + fi + dst=$dst/`basename "$src"` + fi + fi + + # This sed command emulates the dirname command. + dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + + # Make sure that the destination directory exists. + + # Skip lots of stat calls in the usual case. + if test ! -d "$dstdir"; then + defaultIFS=' + ' + IFS="${IFS-$defaultIFS}" + + oIFS=$IFS + # Some sh's can't handle IFS=/ for some reason. + IFS='%' + set - `echo "$dstdir" | sed -e 's@/@[EMAIL PROTECTED]' -e '[EMAIL PROTECTED]@/@'` + IFS=$oIFS + + pathcomp= + + while test $# -ne 0 ; do + pathcomp=$pathcomp$1 + shift + if test ! -d "$pathcomp"; then + $mkdirprog "$pathcomp" + # mkdir can fail with a `File exist' error in case several + # install-sh are creating the directory concurrently. This + # is OK. + test -d "$pathcomp" || exit + fi + pathcomp=$pathcomp/ + done + fi + + if test -n "$dir_arg"; then + $doit $mkdircmd "$dst" \ + && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \ + && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \ + && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \ + && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; } + + else + dstfile=`basename "$dst"` + + # Make a couple of temp file names in the proper directory. + dsttmp=$dstdir/_inst.$$_ + rmtmp=$dstdir/_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0 + trap '(exit $?); exit' 1 2 13 15 + + # Copy the file name to the temp name. + $doit $cpprog "$src" "$dsttmp" && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \ + && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \ + && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \ + && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } && + + # Now rename the file to the real destination. + { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \ + || { + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + if test -f "$dstdir/$dstfile"; then + $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \ + || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \ + || { + echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2 + (exit 1); exit + } + else + : + fi + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dstdir/$dstfile" + } + } + fi || { (exit 1); exit; } +done + +# The final little trick to "correctly" pass the exit status to the exit trap. +{ + (exit 0); exit +} + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-end: "$" +# End: