Modified: pig/branches/spark/bin/pig URL: http://svn.apache.org/viewvc/pig/branches/spark/bin/pig?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/bin/pig (original) +++ pig/branches/spark/bin/pig Wed Feb 22 09:43:41 2017 @@ -330,7 +330,8 @@ HADOOP_CORE_JAR=`echo ${HADOOP_HOME}/had if [ -z "$HADOOP_CORE_JAR" ]; then HADOOP_VERSION=2 else - HADOOP_VERSION=1 + echo "Pig requires Hadoop 2 to be present in HADOOP_HOME (currently: $HADOOP_HOME). Please install Hadoop 2.x" + exit 1 fi # if using HBase, likely want to include HBase jars and config @@ -439,11 +440,7 @@ if [ -n "$HADOOP_BIN" ]; then if [ -n "$PIG_JAR" ]; then CLASSPATH=${CLASSPATH}:$PIG_JAR else - if [ "$HADOOP_VERSION" == "1" ]; then - echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar. do 'ant jar', and try again" - else - echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar. do 'ant -Dhadoopversion=23 jar', and try again" - fi + echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar. do 'ant jar', and try again" exit 1 fi @@ -464,8 +461,8 @@ if [ -n "$HADOOP_BIN" ]; then exec "$HADOOP_BIN" jar "$PIG_JAR" "${remaining[@]}" fi else - # use hadoop-core.jar to run local mode - PIG_JAR=`echo $PIG_HOME/pig*-core-h1.jar` + # use bundled hadoop to run local mode + PIG_JAR=`echo $PIG_HOME/pig*-core-h2.jar` if [ -n "$PIG_JAR" ]; then CLASSPATH="${CLASSPATH}:$PIG_JAR" @@ -474,12 +471,12 @@ else exit 1 fi - for f in $PIG_HOME/lib/h1/*.jar; do + for f in $PIG_HOME/lib/h2/*.jar; do CLASSPATH=${CLASSPATH}:$f; done - # Add bundled hadoop-core.jar - for f in $PIG_HOME/lib/hadoop1-runtime/*.jar; do + # Add bundled hadoop jars + for f in $PIG_HOME/lib/hadoop2-runtime/*.jar; do CLASSPATH=${CLASSPATH}:$f; done
Modified: pig/branches/spark/bin/pig.py URL: http://svn.apache.org/viewvc/pig/branches/spark/bin/pig.py?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/bin/pig.py (original) +++ pig/branches/spark/bin/pig.py Wed Feb 22 09:43:41 2017 @@ -338,7 +338,7 @@ hadoopCoreJars = glob.glob(os.path.join( if len(hadoopCoreJars) == 0: hadoopVersion = 2 else: - hadoopVersion = 1 + sys.exit("Cannot locate Hadoop 2 binaries, please install Hadoop 2.x and try again.") if hadoopBin != "": if debug == True: @@ -361,10 +361,7 @@ if hadoopBin != "": if len(pigJars) == 1: pigJar = pigJars[0] else: - if hadoopVersion == 1: - sys.exit("Cannot locate pig-core-h1.jar do 'ant jar', and try again") - else: - sys.exit("Cannot locate pig-core-h2.jar do 'ant -Dhadoopversion=23 jar', and try again") + sys.exit("Cannot locate pig-core-h2.jar do 'ant jar', and try again") pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h" + str(hadoopVersion), "*.jar")) for jar in pigLibJars: @@ -393,13 +390,13 @@ if hadoopBin != "": else: # fall back to use fat pig.jar if debug == True: - print "Cannot find local hadoop installation, using bundled hadoop 1" - - if os.path.exists(os.path.join(os.environ['PIG_HOME'], "pig-core-h1.jar")): - pigJar = os.path.join(os.environ['PIG_HOME'], "pig-core-h1.jar") + print "Cannot find local hadoop installation, using bundled hadoop 2" + + if os.path.exists(os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar")): + pigJar = os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar") else: - pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "pig-*-core-h1.jar")) + pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "pig-*-core-h2.jar")) if len(pigJars) == 1: pigJar = pigJars[0] @@ -407,15 +404,15 @@ else: elif len(pigJars) > 1: print "Ambiguity with pig jars found the following jars" print pigJars - sys.exit("Please remove irrelavant jars from %s" % os.path.join(os.environ['PIG_HOME'], "pig-core-h1.jar")) + sys.exit("Please remove irrelavant jars from %s" % os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar")) else: - sys.exit("Cannot locate pig-core-h1.jar. do 'ant jar' and try again") + sys.exit("Cannot locate pig-core-h2.jar. do 'ant jar' and try again") - pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h1", "*.jar")) + pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h2", "*.jar")) for jar in pigLibJars: classpath += os.pathsep + jar - pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "hadoop1-runtime", "*.jar")) + pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "hadoop2-runtime", "*.jar")) for jar in pigLibJars: classpath += os.pathsep + jar @@ -423,7 +420,7 @@ else: pigClass = "org.apache.pig.Main" if debug == True: print "dry runXXX:" - print "%s %s %s -classpath %s %s %s" % (java, javaHeapMax, pigOpts, classpath, pigClass, ' '.join(restArgs)) + print "%s %s %s -classpath %s %s %s" % (java, javaHeapMax, pigOpts, classpath, pigClass, ' '.join(restArgs)) else: cmdLine = java + ' ' + javaHeapMax + ' ' + pigOpts cmdLine += ' ' + '-classpath ' + classpath + ' ' + pigClass + ' ' + ' '.join(restArgs) Modified: pig/branches/spark/build.xml URL: http://svn.apache.org/viewvc/pig/branches/spark/build.xml?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/build.xml (original) +++ pig/branches/spark/build.xml Wed Feb 22 09:43:41 2017 @@ -20,6 +20,13 @@ xmlns:ivy="antlib:org.apache.ivy.ant"> <!-- Load all the default properties, and any the user wants --> <!-- to contribute (without having to type -D or edit this file --> + + <taskdef resource="net/sf/antcontrib/antcontrib.properties"> + <classpath> + <pathelement location="${basedir}/ivy/ant-contrib-1.0b3.jar"/> + </classpath> + </taskdef> + <property file="${user.home}/build.properties" /> <property file="${basedir}/build.properties" /> @@ -31,11 +38,11 @@ <property name="pigsmoke.pom" value="${basedir}/ivy/pigsmoke.pom" /> <property name="pigunit.pom" value="${basedir}/ivy/pigunit.pom" /> <property name="piggybank.pom" value="${basedir}/ivy/piggybank.pom" /> - <property name="pig.version" value="0.16.0" /> + <property name="pig.version" value="0.17.0" /> <property name="pig.version.suffix" value="-SNAPSHOT" /> <property name="version" value="${pig.version}${pig.version.suffix}" /> <property name="final.name" value="${name}-${version}" /> - <property name="year" value="2007-2012" /> + <property name="year" value="2007-2016" /> <!-- source properties --> <property name="lib.dir" value="${basedir}/lib" /> @@ -70,7 +77,6 @@ <!-- artifact jar file names --> <property name="artifact.pig.jar" value="${final.name}.jar"/> - <property name="artifact.pig-h1.jar" value="${final.name}-h1.jar"/> <property name="artifact.pig-h2.jar" value="${final.name}-h2.jar"/> <property name="artifact.pig-sources.jar" value="${final.name}-sources.jar"/> <property name="artifact.pig-javadoc.jar" value="${final.name}-javadoc.jar"/> @@ -78,15 +84,12 @@ <!-- jar names. TODO we might want to use the svn reversion name in the name in case it is a dev version --> <property name="output.jarfile.withouthadoop" value="${build.dir}/${final.name}-withouthadoop.jar" /> - <property name="output.jarfile.withouthadoop-h1" value="${legacy.dir}/${final.name}-withouthadoop-h1.jar" /> <property name="output.jarfile.withouthadoop-h2" value="${legacy.dir}/${final.name}-withouthadoop-h2.jar" /> <property name="output.jarfile.core" value="${build.dir}/${artifact.pig.jar}" /> - <property name="output.jarfile.core-h1" value="${build.dir}/${artifact.pig-h1.jar}" /> <property name="output.jarfile.core-h2" value="${build.dir}/${artifact.pig-h2.jar}" /> <property name="output.jarfile.sources" value="${build.dir}/${artifact.pig-sources.jar}" /> <property name="output.jarfile.javadoc" value="${build.dir}/${artifact.pig-javadoc.jar}" /> <!-- Maintain old pig.jar in top level directory. --> - <property name="output.jarfile.backcompat-core-h1" value="${basedir}/${final.name}-core-h1.jar" /> <property name="output.jarfile.backcompat-core-h2" value="${basedir}/${final.name}-core-h2.jar" /> <!-- test properties --> @@ -107,8 +110,6 @@ <property name="test.spark.file" value="${test.src.dir}/spark-tests"/> <property name="test.spark_local.file" value="${test.src.dir}/spark-local-tests"/> <property name="test.exclude.file" value="${test.src.dir}/excluded-tests"/> - <property name="test.exclude.file.20" value="${test.src.dir}/excluded-tests-20"/> - <property name="test.exclude.file.23" value="${test.src.dir}/excluded-tests-23"/> <property name="test.exclude.file.mr" value="${test.src.dir}/excluded-tests-mr"/> <property name="test.exclude.file.tez" value="${test.src.dir}/excluded-tests-tez"/> <property name="test.exclude.file.spark" value="${test.src.dir}/excluded-tests-spark"/> @@ -155,9 +156,8 @@ <target name="setTezEnv"> <propertyreset name="test.timeout" value="900000" /> - <propertyreset name="hadoopversion" value="23" /> - <propertyreset name="isHadoop23" value="true" /> - <propertyreset name="hbase.hadoop.version" value="hadoop2" /> + <propertyreset name="hadoopversion" value="2" /> + <propertyreset name="isHadoop2" value="true" /> <propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" /> <propertyreset name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" /> <propertyreset name="src.exclude.dir" value="" /> @@ -209,40 +209,42 @@ <property name="loglevel" value="quiet" /> <loadproperties srcfile="${ivy.dir}/libraries.properties"/> - <property name="hadoopversion" value="20" /> - <condition property="isHadoop23"> + <!-- + Hadoop master version + (Value 23 is translated for backward compatibility in old build scripts) + --> + <if> <equals arg1="${hadoopversion}" arg2="23"/> - </condition> + <then> + <echo>Property setting hadoopversion=23 is deprecated. Overwriting to hadoopversion=2</echo> + <var name="hadoopversion" unset="true"/> + <property name="hadoopversion" value="2" /> + </then> + </if> + <property name="hadoopversion" value="2" /> - <condition property="hbase.hadoop.version" value="hadoop1" else="hadoop2"> - <not> - <equals arg1="${hadoopversion}" arg2="23"/> - </not> + <condition property="isHadoop2"> + <equals arg1="${hadoopversion}" arg2="2"/> </condition> <!-- HBase master version - Denotes how the HBase dependencies are layout. Value "94" denotes older - format where all HBase code is present in one single jar, which is the - way HBase is available up to version 0.94. Value "95" denotes new format - where HBase is cut into multiple dependencies per each major subsystem, - e.g. "client", "server", ... . Only values "94" and "95" are supported - at the moment. + (Value 95 is translated for backward compatibility in old build scripts) --> - <property name="hbaseversion" value="95" /> - - <!-- exclude tez code if not hadoop20 --> - <condition property="src.exclude.dir" value="**/tez/**" else=""> - <not> - <equals arg1="${hadoopversion}" arg2="23"/> - </not> - </condition> + <if> + <equals arg1="${hbaseversion}" arg2="95"/> + <then> + <echo>Property setting hbaseversion=95 is deprecated. Overwriting to hbaseversion=1</echo> + <var name="hbaseversion" unset="true"/> + <property name="hbaseversion" value="1" /> + </then> + </if> + <property name="hbaseversion" value="1" /> <property name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" /> <property name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" /> - <property name="hadoop.jar" value="hadoop-core-${hadoop-core.version}.jar" /> <property name="asfrepo" value="https://repository.apache.org"/> <property name="asfsnapshotrepo" value="${asfrepo}/content/repositories/snapshots"/> <property name="mvnrepo" value="http://repo2.maven.org/maven2"/> @@ -284,7 +286,7 @@ <property name="xerces.jar" value="${ivy.lib.dir}/xercesImpl-${xerces.version}.jar"/> <property name="jdiff.build.dir" value="${build.docs}/jdiff"/> <property name="jdiff.xml.dir" value="${docs.dir}/jdiff"/> - <property name="jdiff.stable" value="0.15.0"/> + <property name="jdiff.stable" value="0.16.0"/> <property name="jdiff.stable.javadoc" value="http://hadoop.apache.org/${name}/docs/r${jdiff.stable}/api/"/> <!-- Packaging properties --> @@ -392,12 +394,6 @@ <include name="joda-time-${joda-time.version}.jar"/> <include name="automaton-${automaton.version}.jar"/> <include name="jansi-${jansi.version}.jar"/> - <include name="jackson-mapper-asl-${jackson.version}.jar" unless="isHadoop23"/> - <include name="jackson-core-asl-${jackson.version}.jar" unless="isHadoop23"/> - <include name="guava-${guava.version}.jar" unless="isHadoop23"/> - <include name="snappy-java-${snappy.version}.jar" unless="isHadoop23"/> - <include name="asm-${asm.version}.jar" unless="isHadoop23"/> - <include name="scala*.jar"/> <include name="akka*.jar"/> <include name="jcl-over-slf4j*.jar"/> @@ -574,6 +570,7 @@ <echo>*** Building Main Sources ***</echo> <echo>*** To compile with all warnings enabled, supply -Dall.warnings=1 on command line ***</echo> <echo>*** Else, you will only be warned about deprecations ***</echo> + <echo>*** Hadoop version used: ${hadoopversion} ; HBase version used: ${hbaseversion} ***</echo> <compileSources sources="${src.dir};${src.gen.dir};${src.lib.dir}/bzip2;${src.shims.dir}" excludes="${src.exclude.dir}" dist="${build.classes}" cp="classpath" warnings="${javac.args.warnings}" /> <copy todir="${build.classes}/META-INF"> @@ -703,23 +700,6 @@ </target> <!-- ================================================================== --> - <!-- Facede to build pig.jar for both Hadoop 1 and Hadoop 2 --> - <!-- ================================================================== --> - <target name="jar-h12" description="Create pig for both Hadoop 1 and Hadoop 2"> - <propertyreset name="hadoopversion" value="20" /> - <propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" /> - <antcall target="clean" inheritRefs="true" inheritall="true"/> - <antcall target="jar" inheritRefs="true" inheritall="true"/> - <antcall target="copyHadoop1LocalRuntimeDependencies"/> - <delete dir="${build.dir}" /> - <propertyreset name="hadoopversion" value="23" /> - <propertyreset name="hbase.hadoop.version" value="hadoop2" /> - <propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" /> - <propertyreset name="src.exclude.dir" value="" /> - <antcall target="jar" inheritRefs="true" inheritall="true"/> - </target> - - <!-- ================================================================== --> <!-- Make pig.jar --> <!-- ================================================================== --> <target name="jar" depends="compile,ivy-buildJar" description="Create pig core jar"> @@ -727,8 +707,8 @@ <buildJar svnString="${svn.revision}" outputFile="${output.jarfile.withouthadoop}" includedJars="runtime.dependencies-withouthadoop.jar"/> <antcall target="copyCommonDependencies"/> <antcall target="copySparkDependencies"/> - <antcall target="copyh1Dependencies"/> <antcall target="copyh2Dependencies"/> + <antcall target="copyHadoop2LocalRuntimeDependencies" /> </target> <target name="copyCommonDependencies"> @@ -752,7 +732,9 @@ <fileset dir="${ivy.lib.dir}" includes="jruby-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="groovy-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="js-*.jar"/> - <fileset dir="${ivy.lib.dir}" includes="hbase-*.jar" excludes="hbase-*tests.jar,hbase-*hadoop*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="htrace-core*incubating.jar"/> + <fileset dir="${ivy.lib.dir}" includes="metrics-core-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="hbase-*.jar" excludes="hbase-*tests.jar,hbase-*hadoop2*.jar"/> <fileset dir="${ivy.lib.dir}" includes="hive-*.jar" excludes="hive-shims-0.*.jar"/> <fileset dir="${ivy.lib.dir}" includes="protobuf-java-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="zookeeper-*.jar"/> @@ -772,24 +754,12 @@ </copy> </target> - <target name="copyh1Dependencies" unless="isHadoop23"> - <mkdir dir="${lib.dir}/h1" /> - <copy todir="${lib.dir}/h1"> - <fileset dir="${ivy.lib.dir}" includes="avro-mapred-*.jar"/> - <fileset dir="${ivy.lib.dir}" includes="hive-shims-0.*.jar"/> - <fileset dir="${ivy.lib.dir}" includes="hbase-*hadoop1.jar"/> - </copy> - <copy file="${output.jarfile.core}" tofile="${output.jarfile.backcompat-core-h1}"/> - <mkdir dir="${legacy.dir}" /> - <move file="${output.jarfile.withouthadoop}" tofile="${output.jarfile.withouthadoop-h1}"/> - </target> - - <target name="copyh2Dependencies" if="isHadoop23"> + <target name="copyh2Dependencies" if="isHadoop2"> <mkdir dir="${lib.dir}/h2" /> <copy todir="${lib.dir}/h2"> <fileset dir="${ivy.lib.dir}" includes="avro-mapred-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="hive-shims-0.*.jar"/> - <fileset dir="${ivy.lib.dir}" includes="hbase-*hadoop2.jar"/> + <fileset dir="${ivy.lib.dir}" includes="hbase-hadoop2*.jar"/> <fileset dir="${ivy.lib.dir}" includes="tez-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="commons-collections4-*.jar"/> </copy> @@ -798,18 +768,21 @@ <move file="${output.jarfile.withouthadoop}" tofile="${output.jarfile.withouthadoop-h2}"/> </target> - <target name="copyHadoop1LocalRuntimeDependencies"> - <mkdir dir="${lib.dir}/hadoop1-runtime" /> - <copy todir="${lib.dir}/hadoop1-runtime"> - <fileset dir="${ivy.lib.dir}" includes="hadoop-core-*.jar"/> + <target name="copyHadoop2LocalRuntimeDependencies"> + <mkdir dir="${lib.dir}/hadoop2-runtime" /> + <copy todir="${lib.dir}/hadoop2-runtime"> + <fileset dir="${ivy.lib.dir}" includes="hadoop-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="commons-cli-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="commons-configuration-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="commons-collections-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="commons-lang-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="commons-codec-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="commons-io-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="commons-logging-*.jar"/> - <fileset dir="${ivy.lib.dir}" includes="commons-httpclient-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="httpclient-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="httpcore-*.jar"/> <fileset dir="${ivy.lib.dir}" includes="log4j-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="slf4j-*.jar"/> </copy> </target> @@ -955,6 +928,9 @@ <sysproperty key="test.exec.type" value="${test.exec.type}" /> <sysproperty key="ssh.gateway" value="${ssh.gateway}" /> <sysproperty key="hod.server" value="${hod.server}" /> + <sysproperty key="build.classes" value="${build.classes}" /> + <sysproperty key="test.build.classes" value="${test.build.classes}" /> + <sysproperty key="ivy.lib.dir" value="${ivy.lib.dir}" /> <sysproperty key="java.io.tmpdir" value="${junit.tmp.dir}" /> <sysproperty key="hadoop.log.dir" value="${test.log.dir}"/> <jvmarg line="-XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=128M ${debugArgs} -Djava.library.path=${hadoop.root}\bin"/> @@ -980,8 +956,6 @@ <patternset> <includesfile name="@{test.file}"/> <excludesfile name="${test.exclude.file}" if="test.exclude.file"/> - <excludesfile name="${test.exclude.file.20}" unless="isHadoop23"/> - <excludesfile name="${test.exclude.file.23}" if="isHadoop23"/> <excludesfile name="${test.exclude.file.for.exectype}"/> </patternset> <exclude name="**/${exclude.testcase}.java" if="exclude.testcase" /> @@ -990,7 +964,9 @@ </fileset> </batchtest> <batchtest fork="yes" todir="${test.log.dir}" if="testcase"> - <fileset dir="test" includes="**/${testcase}.java"/> + <fileset dir="test" includes="**/${testcase}.java"> + <exclude name="e2e/**/*.java"/> + </fileset> </batchtest> <assertions> @@ -1008,10 +984,10 @@ <target name="test-core-mrtez" description="run core tests on both mr and tez mode" depends="setWindowsPath,setLinuxPath,compile-test,jar,debugger.check,jackson-pig-3039-test-download"> - <fail message="hadoopversion must be set to 23 when invoking test-core-mrtez"> + <fail message="hadoopversion must be set to 2 when invoking test-core-mrtez"> <condition> <not> - <equals arg1="${hadoopversion}" arg2="23" /> + <equals arg1="${hadoopversion}" arg2="2" /> </not> </condition> </fail> @@ -1021,6 +997,9 @@ <propertyreset name="test.exec.type" value="mr" /> <propertyreset name="test.log.dir" value="${test.build.dir}/logs/${test.exec.type}" /> <macro-test-runner test.file="${test.all.file}" tests.failed="test.mr.failed"/> + <delete> + <fileset dir="${build.classes}" includes="*.xml" /> + </delete> <echo /> <echo message="=======================" /> <echo message="Running Tez tests" /> @@ -1099,10 +1078,7 @@ <!-- ================================================================== --> <!-- Distribution --> <!-- ================================================================== --> - <target name="package-h12" depends="jar-h12, docs, api-report, piggybank" description="Create a Pig tar release"> - <package-base/> - </target> - + <target name="package" depends="jar, docs, api-report, piggybank" description="Create a Pig tar release"> <package-base/> </target> @@ -1122,7 +1098,6 @@ <fileset dir="${lib.dir}"/> </copy> - <copy file="${output.jarfile.backcompat-core-h1}" tofile="${tar.dist.dir}/${final.name}-core-h1.jar" failonerror="false"/> <copy file="${output.jarfile.backcompat-core-h2}" tofile="${tar.dist.dir}/${final.name}-core-h2.jar" failonerror="false"/> <copy todir="${tar.dist.dir}/lib" file="contrib/piggybank/java/piggybank.jar"/> @@ -1200,10 +1175,6 @@ <tar-base/> </target> - <target name="tar-h12" depends="package-h12" description="Source distribution"> - <tar-base/> - </target> - <macrodef name="tar-base"> <sequential> <tar compression="gzip" longfile="gnu" destfile="${build.dir}/${artifact.pig.tar}"> @@ -1289,15 +1260,13 @@ uri="urn:maven-artifact-ant" classpathref="mvn-ant-task.classpath"/> </target> - <target name="mvn-install" depends="mvn-taskdef,jar-h12, set-version, source-jar, - javadoc-jar, pigunit-jar, smoketests-jar, piggybank" + <target name="mvn-install" depends="mvn-taskdef, mvn-build, set-version" description="To install pig to local filesystem's m2 cache"> <artifact:pom file="${pig.pom}" id="pig"/> - <artifact:install file="${output.jarfile.core-h1}"> + <artifact:install file="${output.jarfile.core-h2}"> <pom refid="pig"/> <attach file="${output.jarfile.sources}" classifier="sources" /> <attach file="${output.jarfile.javadoc}" classifier="javadoc" /> - <attach file="${output.jarfile.core-h2}" classifier="h2" /> </artifact:install> <artifact:pom file="${pigunit.pom}" id="pigunit"/> <artifact:install file="${pigunit.jarfile}"> @@ -1313,10 +1282,9 @@ </artifact:install> </target> - <target name="mvn-build" depends="jar-h12, source-jar, + <target name="mvn-build" depends="jar, source-jar, javadoc-jar, smoketests-jar, pigunit-jar, piggybank" description="To build the pig jar artifacts to be deployed to apache maven repository"> - <move file="${output.jarfile.backcompat-core-h1}" tofile="${output.jarfile.core}"/> <move file="${output.jarfile.backcompat-core-h2}" tofile="${output.jarfile.core-h2}"/> </target> @@ -1338,8 +1306,6 @@ <pom refid="pig"/> <attach file="${output.jarfile.core}.asc" type="jar.asc"/> <attach file="${pig.pom}.asc" type="pom.asc"/> - <attach file="${output.jarfile.core-h2}.asc" type="jar.asc" classifier="h2"/> - <attach file="${output.jarfile.core-h2}" classifier="h2" /> <attach file="${output.jarfile.sources}.asc" type="jar.asc" classifier="sources"/> <attach file="${output.jarfile.sources}" classifier="sources" /> <attach file="${output.jarfile.javadoc}.asc" type="jar.asc" classifier="javadoc"/> @@ -1374,7 +1340,6 @@ <artifact:deploy file="${output.jarfile.core}"> <remoteRepository id="${snapshots_repo_id}" url="${asfsnapshotrepo}"/> <pom refid="pig"/> - <attach file="${output.jarfile.core-h2}" classifier="h2" /> <attach file="${output.jarfile.sources}" classifier="sources" /> <attach file="${output.jarfile.javadoc}" classifier="javadoc" /> </artifact:deploy> @@ -1418,8 +1383,6 @@ </macrodef> <sign-artifact input.file="${output.jarfile.core}" output.file="${output.jarfile.core}.asc" gpg.passphrase="${gpg.passphrase}"/> - <sign-artifact input.file="${output.jarfile.core-h2}" - output.file="${output.jarfile.core-h2}.asc" gpg.passphrase="${gpg.passphrase}"/> <sign-artifact input.file="${output.jarfile.sources}" output.file="${output.jarfile.sources}.asc" gpg.passphrase="${gpg.passphrase}"/> <sign-artifact input.file="${output.jarfile.javadoc}" @@ -1707,7 +1670,9 @@ <target name="ivy-resolve" depends="ivy-init" unless="ivy.resolved" description="Resolve Ivy dependencies"> <property name="ivy.resolved" value="true"/> + <echo>*** Ivy resolve with Hadoop ${hadoopversion} and HBase ${hbaseversion} ***</echo> <ivy:resolve log="${loglevel}" settingsRef="${ant.project.name}.ivy.settings" conf="compile"/> + <ivy:report toDir="build/ivy/report"/> </target> <target name="ivy-compile" depends="ivy-resolve" description="Retrieve Ivy-managed artifacts for compile configuration"> Modified: pig/branches/spark/conf/pig.properties URL: http://svn.apache.org/viewvc/pig/branches/spark/conf/pig.properties?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/conf/pig.properties (original) +++ pig/branches/spark/conf/pig.properties Wed Feb 22 09:43:41 2017 @@ -557,6 +557,9 @@ pig.location.check.strict=false # hcat.bin=/usr/local/hcat/bin/hcat +# Enable ATS hook to log the Pig specific ATS entry, disable only when ATS server is not deployed +pig.ats.enabled=true + ########################################################################### # # Overrides for extreme environments @@ -611,13 +614,13 @@ hcat.bin=/usr/local/hcat/bin/hcat # If you want Pig to allow certain errors before failing you can set this property. # If the propery is set to true and the StoreFunc implements ErrorHandling if will allow configurable errors # based on the OutputErrorHandler implementation -# pig.allow.store.errors = false +# pig.error-handling.enabled = false # # Controls the minimum number of errors for store -# pig.errors.min.records = 0 +# pig.error-handling.min.error.records = 0 # # Set the threshold for percentage of errors -# pig.error.threshold.percent = 0.0f +# pig.error-handling.error.threshold = 0.0f ########################################################################### # @@ -675,3 +678,6 @@ hcat.bin=/usr/local/hcat/bin/hcat pig.sort.readonce.loadfuncs=org.apache.pig.backend.hadoop.hbase.HBaseStorage,org.apache.pig.backend.hadoop.accumulo.AccumuloStorage +# If set, Pig will override tez.am.launch.cmd-opts and tez.am.resource.memory.mb to optimal +# even they are set to a different value. Default value is true. +#pig.tez.configure.am.memory=false Modified: pig/branches/spark/contrib/piggybank/java/build.xml URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/build.xml?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/build.xml (original) +++ pig/branches/spark/contrib/piggybank/java/build.xml Wed Feb 22 09:43:41 2017 @@ -16,13 +16,20 @@ --> <project basedir="." default="jar" name="pigudf"> + + <taskdef resource="net/sf/antcontrib/antcontrib.properties"> + <classpath> + <pathelement location="../../../ivy/ant-contrib-1.0b3.jar"/> + </classpath> + </taskdef> + <property file="../../../build.properties" /> <!-- javac properties --> <property name="javac.debug" value="on" /> <property name="javac.level" value="source,lines,vars"/> <property name="javac.optimize" value="on" /> <property name="javac.deprecation" value="off" /> - <property name="javac.version" value="1.6" /> + <property name="javac.version" value="1.7" /> <property name="javac.args" value="" /> <!-- TODO we should use warning... <property name="javac.args.warnings" value="-Xlint:unchecked" /> --> <property name="javac.args.warnings" value="" /> @@ -38,16 +45,22 @@ <property name="src.dir" value="src/main/java/org/apache/pig/piggybank" /> <property name="hsqldb.jar" value="../../../build/ivy/lib/Pig/hsqldb-1.8.0.10.jar"/> - <!-- JobHistoryLoader currently does not support 0.23 --> - <condition property="build.classes.excludes" value="**/HadoopJobHistoryLoader.java" else=""> - <equals arg1="${hadoopversion}" arg2="23"/> - </condition> - <condition property="test.classes.excludes" value="**/TestHadoopJobHistoryLoader.java" else=""> + <!-- + Hadoop master version + (Value 23 is translated for backward compatibility in old build scripts) + --> + <if> <equals arg1="${hadoopversion}" arg2="23"/> - </condition> + <then> + <echo>Property setting hadoopversion=23 is deprecated. Overwriting to hadoopversion=2</echo> + <var name="hadoopversion" unset="true"/> + <property name="hadoopversion" value="2" /> + </then> + </if> + <property name="hadoopversion" value="2" /> - <condition property="hadoopsuffix" value="2" else="1"> - <equals arg1="${hadoopversion}" arg2="23"/> + <condition property="hadoopsuffix" value="2" else=""> + <equals arg1="${hadoopversion}" arg2="2"/> </condition> <!-- jar properties --> Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java Wed Feb 22 09:43:41 2017 @@ -21,6 +21,7 @@ package org.apache.pig.piggybank.evaluat import java.io.IOException; import java.util.Iterator; +import org.apache.pig.Accumulator; import org.apache.pig.Algebraic; import org.apache.pig.EvalFunc; import org.apache.pig.backend.executionengine.ExecException; @@ -43,7 +44,7 @@ import org.apache.pig.impl.logicalLayer. * * @author Vadim Zaliva <l...@codemindes.com> */ -public class MaxTupleBy1stField extends EvalFunc<Tuple> implements Algebraic +public class MaxTupleBy1stField extends EvalFunc<Tuple> implements Algebraic, Accumulator<Tuple> { /** * Indicates once for how many items progress hartbeat should be sent. @@ -131,6 +132,11 @@ public class MaxTupleBy1stField extends protected static Tuple max(Tuple input, PigProgressable reporter) throws ExecException { DataBag values = (DataBag) input.get(0); + return max(values,reporter); + } + + protected static Tuple max(DataBag values, PigProgressable reporter) throws ExecException + { // if we were handed an empty bag, return NULL // this is in compliance with SQL standard @@ -183,4 +189,44 @@ public class MaxTupleBy1stField extends return Final.class.getName(); } + + /** + * Accumulator implementation + */ + + private Tuple intermediate = null; + + /** + * Accumulate implementation - calls max() on the incoming tuple set including intermediate tuple if already exists + * @param b A tuple containing a single field, which is a bag. The bag will contain the set + * @throws IOException + */ + @Override + public void accumulate(Tuple b) throws IOException { + try{ + DataBag values = BagFactory.getInstance().newDefaultBag(); + values.addAll((DataBag) b.get(0)); + + if (intermediate != null) { + values.add(intermediate); + } + intermediate = max(values,reporter); + + }catch (ExecException ee){ + IOException oughtToBeEE = new IOException(); + oughtToBeEE.initCause(ee); + throw oughtToBeEE; + } + } + + @Override + public Tuple getValue() { + return intermediate; + } + + @Override + public void cleanup() { + intermediate = null; + } + } Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java Wed Feb 22 09:43:41 2017 @@ -23,10 +23,13 @@ import java.util.Iterator; import java.util.List; import org.apache.pig.EvalFunc; -import org.apache.pig.FuncSpec; import org.apache.pig.PigException; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.builtin.AVG; +import org.apache.pig.builtin.BigDecimalAvg; +import org.apache.pig.builtin.BigDecimalMax; +import org.apache.pig.builtin.BigDecimalMin; +import org.apache.pig.builtin.BigDecimalSum; import org.apache.pig.builtin.COUNT; import org.apache.pig.builtin.DoubleAvg; import org.apache.pig.builtin.DoubleMax; @@ -54,6 +57,7 @@ import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; /** * Given an aggregate function, a bag, and possibly a window definition, @@ -73,23 +77,27 @@ import org.apache.pig.impl.logicalLayer. * <li>sum(int)</li> * <li>sum(long)</li> * <li>sum(bytearray)</li> + * <li>sum(bigdecimal)</li> * <li>avg(double)</li> * <li>avg(float)</li> * <li>avg(long)</li> * <li>avg(int)</li> * <li>avg(bytearray)</li> + * <li>avg(bigdecimal)</li> * <li>min(double)</li> * <li>min(float)</li> * <li>min(long)</li> * <li>min(int)</li> * <li>min(chararray)</li> * <li>min(bytearray)</li> + * <li>min(bigdecimal)</li> * <li>max(double)</li> * <li>max(float)</li> * <li>max(long)</li> * <li>max(int)</li> * <li>max(chararray)</li> * <li>max(bytearray)</li> + * <li>max(bigdecimal)</li> * <li>row_number</li> * <li>first_value</li> * <li>last_value</li> @@ -153,7 +161,8 @@ import org.apache.pig.impl.logicalLayer. * current row and 3 following) over T;</tt> * * <p>Over accepts a constructor argument specifying the name and type, - * colon-separated, of its return schema.</p> + * colon-separated, of its return schema. If the argument option is 'true' use the inner-search, + * take the name and type of bag and return a schema with alias+'_over' and the same type</p> * * <p><pre> * DEFINE IOver org.apache.pig.piggybank.evaluation.Over('state_rk:int'); @@ -188,12 +197,14 @@ public class Over extends EvalFunc<DataB private Object[] udfArgs; private byte returnType; private String returnName; + private boolean searchInnerType; public Over() { initialized = false; udfArgs = null; func = null; returnType = DataType.UNKNOWN; + searchInnerType = false; } public Over(String typespec) { @@ -202,12 +213,16 @@ public class Over extends EvalFunc<DataB String[] fn_tn = typespec.split(":", 2); this.returnName = fn_tn[0]; this.returnType = DataType.findTypeByName(fn_tn[1]); - } else { + } else if(Boolean.parseBoolean(typespec)) { + searchInnerType = Boolean.parseBoolean(typespec); + }else{ this.returnName = "result"; this.returnType = DataType.findTypeByName(typespec); - } + } } + + @Override public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() < 2) { @@ -255,19 +270,42 @@ public class Over extends EvalFunc<DataB @Override public Schema outputSchema(Schema inputSch) { try { - if (returnType == DataType.UNKNOWN) { + FieldSchema field; + + if (searchInnerType) { + field = new FieldSchema(inputSch.getField(0)); + while (searchInnerType) { + if (field.schema != null + && field.schema.getFields().size() > 1) { + searchInnerType = false; + } else { + if (field.type == DataType.TUPLE + || field.type == DataType.BAG) { + field = new FieldSchema(field.schema.getField(0)); + } else { + field.alias = field.alias + "_over"; + searchInnerType = false; + } + } + } + + searchInnerType = true; + } else if (returnType == DataType.UNKNOWN) { return Schema.generateNestedSchema(DataType.BAG, DataType.NULL); } else { - Schema outputTupleSchema = new Schema(new Schema.FieldSchema(returnName, returnType)); - return new Schema(new Schema.FieldSchema( - getSchemaName(this.getClass().getName().toLowerCase(), inputSch), - outputTupleSchema, - DataType.BAG)); + field = new Schema.FieldSchema(returnName, returnType); } + + Schema outputTupleSchema = new Schema(field); + return new Schema(new Schema.FieldSchema(getSchemaName(this + .getClass().getName().toLowerCase(), inputSch), + outputTupleSchema, DataType.BAG)); + } catch (FrontendException fe) { throw new RuntimeException("Unable to create nested schema", fe); } } + private void init(Tuple input) throws IOException { initialized = true; @@ -329,6 +367,8 @@ public class Over extends EvalFunc<DataB func = new LongSum(); } else if ("sum(bytearray)".equalsIgnoreCase(agg)) { func = new SUM(); + } else if ("sum(bigdecimal)".equalsIgnoreCase(agg)) { + func = new BigDecimalSum(); } else if ("avg(double)".equalsIgnoreCase(agg)) { func = new DoubleAvg(); } else if ("avg(float)".equalsIgnoreCase(agg)) { @@ -339,6 +379,8 @@ public class Over extends EvalFunc<DataB func = new IntAvg(); } else if ("avg(bytearray)".equalsIgnoreCase(agg)) { func = new AVG(); + } else if ("avg(bigdecimal)".equalsIgnoreCase(agg)) { + func = new BigDecimalAvg(); } else if ("min(double)".equalsIgnoreCase(agg)) { func = new DoubleMin(); } else if ("min(float)".equalsIgnoreCase(agg)) { @@ -351,6 +393,8 @@ public class Over extends EvalFunc<DataB func = new StringMin(); } else if ("min(bytearray)".equalsIgnoreCase(agg)) { func = new MIN(); + } else if ("min(bigdecimal)".equalsIgnoreCase(agg)) { + func = new BigDecimalMin(); } else if ("max(double)".equalsIgnoreCase(agg)) { func = new DoubleMax(); } else if ("max(float)".equalsIgnoreCase(agg)) { @@ -363,6 +407,8 @@ public class Over extends EvalFunc<DataB func = new StringMax(); } else if ("max(bytearray)".equalsIgnoreCase(agg)) { func = new MAX(); + } else if ("max(bigdecimal)".equalsIgnoreCase(agg)) { + func = new BigDecimalMax(); } else if ("row_number".equalsIgnoreCase(agg)) { func = new RowNumber(); } else if ("first_value".equalsIgnoreCase(agg)) { Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java Wed Feb 22 09:43:41 2017 @@ -363,6 +363,15 @@ public class SearchEngineExtractor exten searchEngines.put("search.lycos.com", "Lycos"); searchEngines.put("search.msn.co.uk", "MSN UK"); searchEngines.put("search.msn.com", "MSN"); + searchEngines.put("bing.com", "Bing"); + searchEngines.put("ssl.bing.com", "Bing"); + searchEngines.put("cn.bing.com", "Bing China"); + searchEngines.put("br.bing.com", "Bing Brazil"); + searchEngines.put("it.bing.com", "Bing Italy"); + searchEngines.put("be.bing.com", "Bing Netherlands"); + searchEngines.put("uk.bing.com", "Bing UK"); + searchEngines.put("hk.bing.com", "Bing Hong Kong"); + searchEngines.put("nz.bing.com", "Bing New Zeland"); searchEngines.put("search.myway.com", "MyWay"); searchEngines.put("search.mywebsearch.com", "My Web Search"); searchEngines.put("search.ntlworld.com", "NTLWorld"); Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java Wed Feb 22 09:43:41 2017 @@ -16,8 +16,11 @@ package org.apache.pig.piggybank.evaluat import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; +import javax.xml.XMLConstants; +import javax.xml.namespace.NamespaceContext; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPathFactory; @@ -49,8 +52,7 @@ public class XPath extends EvalFunc<Stri private static boolean cache = true; private static boolean ignoreNamespace = true; - public static final String EMPTY_STRING = ""; - + /** * input should contain: 1) xml 2) xpath * 3) optional cache xml doc flag @@ -95,8 +97,13 @@ public class XPath extends EvalFunc<Stri return null; } - if(input.size() > 2) + if(input.size() > 2) { cache = (Boolean) input.get(2); + } + + if (input.size() > 3) { + ignoreNamespace = (Boolean) input.get(3); + } if (!cache || xpath == null || !xml.equals(this.xml)) { final InputSource source = new InputSource(new StringReader(xml)); @@ -104,6 +111,7 @@ public class XPath extends EvalFunc<Stri this.xml = xml; // track the xml for subsequent calls to this udf final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + dbf.setNamespaceAware(!ignoreNamespace); final DocumentBuilder db = dbf.newDocumentBuilder(); this.document = db.parse(source); @@ -112,14 +120,32 @@ public class XPath extends EvalFunc<Stri this.xpath = xpathFactory.newXPath(); + if (!ignoreNamespace) { + xpath.setNamespaceContext(new NamespaceContext() { + @Override + public String getNamespaceURI(String prefix) { + if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) { + return document.lookupNamespaceURI(null); + } else { + return document.lookupNamespaceURI(prefix); + } + } + + @Override + public String getPrefix(String namespaceURI) { + return document.lookupPrefix(namespaceURI); + } + + @Override + public Iterator getPrefixes(String namespaceURI) { + return null; + } + }); + } } String xpathString = (String) input.get(1); - if (ignoreNamespace) { - xpathString = createNameSpaceIgnoreXpathString(xpathString); - } - final String value = xpath.evaluate(xpathString, document); return value; @@ -165,34 +191,6 @@ public class XPath extends EvalFunc<Stri } return true; } - - - /** - * Returns a new the xPathString by adding additional parameters - * in the existing xPathString for ignoring the namespace during compilation. - * - * @param String xpathString - * @return String modified xpathString - */ - private String createNameSpaceIgnoreXpathString(final String xpathString) { - final String QUERY_PREFIX = "//*"; - final String LOCAL_PREFIX = "[local-name()='"; - final String LOCAL_POSTFIX = "']"; - final String SPLITTER = "/"; - - try { - String xpathStringWithLocalName = EMPTY_STRING; - String[] individualNodes = xpathString.split(SPLITTER); - - for (String node : individualNodes) { - xpathStringWithLocalName = xpathStringWithLocalName.concat(QUERY_PREFIX + LOCAL_PREFIX + node - + LOCAL_POSTFIX); - } - return xpathStringWithLocalName; - } catch (Exception ex) { - return xpathString; - } - } /** * Returns argument schemas of the UDF. Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java Wed Feb 22 09:43:41 2017 @@ -580,7 +580,7 @@ public class CSVExcelStorage extends Pig } } else if (b == DOUBLE_QUOTE) { // Does a double quote immediately follow? - if ((i < recordLen-1) && (buf[i+1] == DOUBLE_QUOTE)) { + if ((i < recordLen-1) && (buf[i+1] == DOUBLE_QUOTE) && (fieldBuffer.position() != 0)) { fieldBuffer.put(b); nextTupleSkipChar = true; continue; Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java Wed Feb 22 09:43:41 2017 @@ -91,6 +91,7 @@ public class DBStorage extends StoreFunc /** * Write the tuple to Database directly here. */ + @Override public void putNext(Tuple tuple) throws IOException { int sqlPos = 1; try { @@ -373,4 +374,9 @@ public class DBStorage extends StoreFunc p.setProperty(SCHEMA_SIGNATURE, s.toString()); } + @Override + public Boolean supportsParallelWriteToStoreLocation() { + return false; + } + } Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java Wed Feb 22 09:43:41 2017 @@ -60,7 +60,6 @@ import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.util.StorageUtil; import org.apache.pig.data.DataType; import org.apache.pig.data.DataByteArray; -import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims; /** * <code>IndexedStorage</code> is a form of <code>PigStorage</code> that supports a Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java Wed Feb 22 09:43:41 2017 @@ -16,7 +16,9 @@ import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.IOException; import java.text.NumberFormat; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; @@ -42,6 +44,9 @@ import org.apache.pig.backend.executione import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration; import org.apache.pig.data.Tuple; import org.apache.pig.impl.util.StorageUtil; +import org.apache.xml.utils.StringBufferPool; + +import com.google.common.base.Strings; /** * The UDF is useful for splitting the output data into a bunch of directories @@ -73,13 +78,21 @@ import org.apache.pig.impl.util.StorageU * If the output is compressed,then the sub directories and the output files will * be having the extension. Say for example in the above case if bz2 is used one file * will look like ;/my/home/output.bz2/a1.bz2/a1-0000.bz2 + * + * Key field can also be a comma separated list of indices e.g. '0,1' - in this case + * storage will be multi-level: + * /my/home/output/a1/b1/a1-b1-0000 + * /my/home/output/a1/b2/a1-b2-0000 + * There is also an option to leave key values out of storage, see isRemoveKeys. */ public class MultiStorage extends StoreFunc { + private static final String KEYFIELD_DELIMETER = ","; private Path outputPath; // User specified output Path - private int splitFieldIndex = -1; // Index of the key field + private final List<Integer> splitFieldIndices= new ArrayList<Integer>(); // Indices of the key fields private String fieldDel; // delimiter of the output record. private Compression comp; // Compression type of output data. + private boolean isRemoveKeys = false; // Compression types supported by this store enum Compression { @@ -95,9 +108,14 @@ public class MultiStorage extends StoreF this(parentPathStr, splitFieldIndex, compression, "\\t"); } + public MultiStorage(String parentPathStr, String splitFieldIndex, + String compression, String fieldDel) { + this(parentPathStr, splitFieldIndex, compression, fieldDel, "false"); + } + /** * Constructor - * + * * @param parentPathStr * Parent output dir path (this will be specified in store statement, * so MultiStorage don't use this parameter in reality. However, we don't @@ -108,18 +126,26 @@ public class MultiStorage extends StoreF * 'bz2', 'bz', 'gz' or 'none' * @param fieldDel * Output record field delimiter. + * @param isRemoveKeys + * Removes key columns from result during write. */ public MultiStorage(String parentPathStr, String splitFieldIndex, - String compression, String fieldDel) { + String compression, String fieldDel, String isRemoveKeys) { + this.isRemoveKeys = Boolean.parseBoolean(isRemoveKeys); this.outputPath = new Path(parentPathStr); - this.splitFieldIndex = Integer.parseInt(splitFieldIndex); + + String[] splitFieldIndices = splitFieldIndex.split(KEYFIELD_DELIMETER); + for (String splitFieldIndexString : splitFieldIndices){ + this.splitFieldIndices.add(Integer.parseInt(splitFieldIndexString)); + } + this.fieldDel = fieldDel; try { this.comp = (compression == null) ? Compression.none : Compression - .valueOf(compression.toLowerCase()); + .valueOf(compression.toLowerCase()); } catch (IllegalArgumentException e) { System.err.println("Exception when converting compression string: " - + compression + " to enum. No compression will be used"); + + compression + " to enum. No compression will be used"); this.comp = Compression.none; } } @@ -127,22 +153,26 @@ public class MultiStorage extends StoreF //-------------------------------------------------------------------------- // Implementation of StoreFunc - private RecordWriter<String, Tuple> writer; + private RecordWriter<List<String>, Tuple> writer; @Override public void putNext(Tuple tuple) throws IOException { - if (tuple.size() <= splitFieldIndex) { - throw new IOException("split field index:" + this.splitFieldIndex - + " >= tuple size:" + tuple.size()); + for (int splitFieldIndex : this.splitFieldIndices) { + if (tuple.size() <= splitFieldIndex) { + throw new IOException("split field index:" + splitFieldIndex + + " >= tuple size:" + tuple.size()); + } } - Object field = null; - try { - field = tuple.get(splitFieldIndex); - } catch (ExecException exec) { - throw new IOException(exec); + List<String> fields = new ArrayList<String>(); + for (int splitFieldIndex : this.splitFieldIndices){ + try { + fields.add(String.valueOf(tuple.get(splitFieldIndex))); + } catch (ExecException exec) { + throw new IOException(exec); + } } try { - writer.write(String.valueOf(field), tuple); + writer.write(fields, tuple); } catch (InterruptedException e) { throw new IOException(e); } @@ -153,6 +183,9 @@ public class MultiStorage extends StoreF public OutputFormat getOutputFormat() throws IOException { MultiStorageOutputFormat format = new MultiStorageOutputFormat(); format.setKeyValueSeparator(fieldDel); + if (this.isRemoveKeys){ + format.setSkipIndices(this.splitFieldIndices); + } return format; } @@ -174,27 +207,33 @@ public class MultiStorage extends StoreF FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } } - + + @Override + public Boolean supportsParallelWriteToStoreLocation() { + return false; + } + //-------------------------------------------------------------------------- // Implementation of OutputFormat public static class MultiStorageOutputFormat extends - TextOutputFormat<String, Tuple> { + TextOutputFormat<List<String>, Tuple> { private String keyValueSeparator = "\\t"; private byte fieldDel = '\t'; - + private List<Integer> skipIndices = null; + @Override - public RecordWriter<String, Tuple> + public RecordWriter<List<String>, Tuple> getRecordWriter(TaskAttemptContext context ) throws IOException, InterruptedException { final TaskAttemptContext ctx = context; - return new RecordWriter<String, Tuple>() { + return new RecordWriter<List<String>, Tuple>() { - private Map<String, MyLineRecordWriter> storeMap = - new HashMap<String, MyLineRecordWriter>(); + private Map<List<String>, MyLineRecordWriter> storeMap = + new HashMap<List<String>, MyLineRecordWriter>(); private static final int BUFFER_SIZE = 1024; @@ -202,7 +241,7 @@ public class MultiStorage extends StoreF new ByteArrayOutputStream(BUFFER_SIZE); @Override - public void write(String key, Tuple val) throws IOException { + public void write(List<String> key, Tuple val) throws IOException { int sz = val.size(); for (int i = 0; i < sz; i++) { Object field; @@ -212,9 +251,13 @@ public class MultiStorage extends StoreF throw ee; } - StorageUtil.putField(mOut, field); + boolean skipCurrentField = skipIndices != null && skipIndices.contains(i); - if (i != sz - 1) { + if (!skipCurrentField) { + StorageUtil.putField(mOut, field); + } + + if (i != sz - 1 && !skipCurrentField) { mOut.write(fieldDel); } } @@ -231,17 +274,17 @@ public class MultiStorage extends StoreF } } - private MyLineRecordWriter getStore(String fieldValue) throws IOException { - MyLineRecordWriter store = storeMap.get(fieldValue); + private MyLineRecordWriter getStore(List<String> fieldValues) throws IOException { + MyLineRecordWriter store = storeMap.get(fieldValues); if (store == null) { - DataOutputStream os = createOutputStream(fieldValue); + DataOutputStream os = createOutputStream(fieldValues); store = new MyLineRecordWriter(os, keyValueSeparator); - storeMap.put(fieldValue, store); + storeMap.put(fieldValues, store); } return store; } - private DataOutputStream createOutputStream(String fieldValue) throws IOException { + private DataOutputStream createOutputStream(List<String> fieldValues) throws IOException { Configuration conf = ctx.getConfiguration(); TaskID taskId = ctx.getTaskAttemptID().getTaskID(); @@ -259,7 +302,21 @@ public class MultiStorage extends StoreF NumberFormat nf = NumberFormat.getInstance(); nf.setMinimumIntegerDigits(4); - Path path = new Path(fieldValue+extension, fieldValue + '-' + StringBuffer pathStringBuffer = new StringBuffer(); + for (String fieldValue : fieldValues){ + String safeFieldValue = fieldValue.replaceAll("\\/","-"); + pathStringBuffer.append(safeFieldValue); + pathStringBuffer.append("/"); + } + pathStringBuffer.deleteCharAt(pathStringBuffer.length()-1); + String pathString = pathStringBuffer.toString(); + String idString = pathString.replaceAll("\\/","-"); + + if (!Strings.isNullOrEmpty(extension)){ + pathString = pathString.replaceAll("\\/",extension+"\\/"); + } + + Path path = new Path(pathString+extension, idString + '-' + nf.format(taskId.getId())+extension); Path workOutputPath = ((FileOutputCommitter)getOutputCommitter(ctx)).getWorkPath(); Path file = new Path(workOutputPath, path); @@ -279,8 +336,12 @@ public class MultiStorage extends StoreF keyValueSeparator = sep; fieldDel = StorageUtil.parseFieldDel(keyValueSeparator); } - - //------------------------------------------------------------------------ + + public void setSkipIndices(List<Integer> skipIndices) { + this.skipIndices = skipIndices; + } + + //------------------------------------------------------------------------ // protected static class MyLineRecordWriter Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java Wed Feb 22 09:43:41 2017 @@ -18,12 +18,11 @@ package org.apache.pig.piggybank.evaluation; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import java.math.BigDecimal; import java.util.Iterator; -import java.util.List; import java.util.Random; import org.apache.pig.backend.executionengine.ExecException; @@ -34,8 +33,6 @@ import org.apache.pig.data.DataType; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.logicalLayer.schema.Schema; - -import org.junit.Before; import org.junit.Test; public class TestOver { @@ -66,11 +63,25 @@ public class TestOver { out = func.outputSchema(in); assertEquals("{org.apache.pig.piggybank.evaluation.over_3: {result: double}}", out.toString()); + // bigdecimal + func = new Over("BIGDECIMAL"); + in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER); + out = func.outputSchema(in); + assertEquals("{org.apache.pig.piggybank.evaluation.over_4: {result: bigdecimal}}", out.toString()); + // named func = new Over("bob:chararray"); in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER); out = func.outputSchema(in); - assertEquals("{org.apache.pig.piggybank.evaluation.over_4: {bob: chararray}}", out.toString()); + assertEquals("{org.apache.pig.piggybank.evaluation.over_5: {bob: chararray}}", out.toString()); + + + // Search inner alias and type + func = new Over("true"); + in = Schema.generateNestedSchema(DataType.BAG, DataType.BIGDECIMAL); + in.getField(0).schema.getField(0).alias="test"; + out = func.outputSchema(in); + assertEquals("{org.apache.pig.piggybank.evaluation.over_6: {test_over: bigdecimal}}", out.toString()); } @Test @@ -397,6 +408,28 @@ public class TestOver { assertEquals(new Long(10), to.get(0)); } } + + @Test + public void testSumBigDecimal() throws Exception { + Over func = new Over(); + DataBag inbag = BagFactory.getInstance().newDefaultBag(); + for (int i = 0; i < 10; i++) { + Tuple t = TupleFactory.getInstance().newTuple(1); + t.set(0, new BigDecimal(1)); + inbag.add(t); + } + Tuple t = TupleFactory.getInstance().newTuple(4); + t.set(0, inbag); + t.set(1, "sum(bigdecimal)"); + t.set(2, -1); + t.set(3, -1); + DataBag outbag = func.exec(t); + assertEquals(10, outbag.size()); + for (Tuple to : outbag) { + assertEquals(1, to.size()); + assertEquals(new BigDecimal(10), to.get(0)); + } + } @Test public void testAvgDouble() throws Exception { @@ -509,6 +542,29 @@ public class TestOver { } @Test + public void testAvgBigDecimal() throws Exception { + Over func = new Over(); + DataBag inbag = BagFactory.getInstance().newDefaultBag(); + for (int i = 0; i < 10; i++) { + Tuple t = TupleFactory.getInstance().newTuple(1); + t.set(0, new BigDecimal(i)); + inbag.add(t); + } + Tuple t = TupleFactory.getInstance().newTuple(4); + t.set(0, inbag); + t.set(1, "avg(bigdecimal)"); + t.set(2, -1); + t.set(3, -1); + DataBag outbag = func.exec(t); + assertEquals(10, outbag.size()); + for (Tuple to : outbag) { + assertEquals(1, to.size()); + assertEquals(new BigDecimal(4.5), to.get(0)); + } + } + + + @Test public void testMinDouble() throws Exception { Over func = new Over(); DataBag inbag = BagFactory.getInstance().newDefaultBag(); @@ -627,6 +683,26 @@ public class TestOver { assertEquals("0", to.get(0)); } } + + @Test + public void testMinBigDecimal() throws Exception { + Over func = new Over(); + DataBag inbag = BagFactory.getInstance().newDefaultBag(); + for (int i = 0; i < 10; i++) { + Tuple t = TupleFactory.getInstance().newTuple(1); + t.set(0, new BigDecimal(i)); + inbag.add(t); + } + Tuple t = TupleFactory.getInstance().newTuple(2); + t.set(0, inbag); + t.set(1, "min(bigdecimal)"); + DataBag outbag = func.exec(t); + assertEquals(10, outbag.size()); + for (Tuple to : outbag) { + assertEquals(1, to.size()); + assertEquals(new BigDecimal(0), to.get(0)); + } + } @Test public void testMaxDouble() throws Exception { @@ -754,6 +830,28 @@ public class TestOver { assertEquals("9", to.get(0)); } } + + @Test + public void testMaxBigDecimal() throws Exception { + Over func = new Over(); + DataBag inbag = BagFactory.getInstance().newDefaultBag(); + for (int i = 0; i < 10; i++) { + Tuple t = TupleFactory.getInstance().newTuple(1); + t.set(0, new BigDecimal(i)); + inbag.add(t); + } + Tuple t = TupleFactory.getInstance().newTuple(2); + t.set(0, inbag); + t.set(1, "max(bigdecimal)"); + DataBag outbag = func.exec(t); + assertEquals(10, outbag.size()); + int count = 0; + for (Tuple to : outbag) { + assertEquals(1, to.size()); + assertEquals(new BigDecimal(count++), to.get(0)); + } + } + @Test public void testRowNumber() throws Exception { Added: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java?rev=1783988&view=auto ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java (added) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java Wed Feb 22 09:43:41 2017 @@ -0,0 +1,95 @@ +package org.apache.pig.piggybank.test.evaluation; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.google.common.collect.Lists; +import org.apache.pig.data.BagFactory; +import org.apache.pig.data.DataBag; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; +import org.apache.pig.piggybank.evaluation.MaxTupleBy1stField; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + +public class TestMaxTupleBy1stField { + + private static List<Tuple> inputTuples = new ArrayList<>(); + private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance(); + private static final BagFactory BAG_FACTORY = BagFactory.getInstance(); + + @BeforeClass + public static void setup() throws Exception { + inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(0L, "Fruit", "orange", 21F))); + inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(1L, "Fruit", "apple", 9.9F))); + inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(2L, "Vegetable", "paprika", 30F))); + inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(3L, "Fruit", "blueberry", 40F))); + inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(4L, "Vegetable", "carrot", 50F))); + inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(5L, "Fruit", "blueberry", 41F))); + inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(6L, "Vegetable", "paprika", 31F))); + inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(7L, "Fruit", "orange", 20.5F))); + inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(8L, "Fruit", "apple", 10.1F))); + inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(9L, "Fruit", "apple", 10.2F))); + } + + + @Test + public void testExecFunc() throws Exception { + MaxTupleBy1stField udf = new MaxTupleBy1stField(); + Tuple inputTuple = createTupleFromInputList(0,inputTuples.size()); + + Tuple result = udf.exec(inputTuple); + Assert.assertEquals("apple", result.get(2)); + Assert.assertEquals(10.2F, (Float) result.get(3), 1E-8); + } + + @Test + public void testAccumulator() throws Exception { + MaxTupleBy1stField udf = new MaxTupleBy1stField(); + + Tuple inputTuple = createTupleFromInputList(0, 3); + udf.accumulate(inputTuple); + Tuple result = udf.getValue(); + Assert.assertEquals("paprika", result.get(2)); + Assert.assertEquals(30F, (Float) result.get(3), 1E-6); + + inputTuple = createTupleFromInputList(3, 6); + udf.accumulate(inputTuple); + result = udf.getValue(); + Assert.assertEquals("apple", result.get(2)); + Assert.assertEquals(10.1F, (Float) result.get(3), 1E-6); + + udf.cleanup(); + Assert.assertEquals(null,udf.getValue()); + } + + private static Tuple createTupleFromInputList(int offset, int length) { + DataBag inputBag = BAG_FACTORY.newDefaultBag(); + for (int i = offset; i < offset+length; ++i) { + inputBag.add(inputTuples.get(i)); + } + Tuple inputTuple = TUPLE_FACTORY.newTuple(); + inputTuple.append(inputBag); + return inputTuple; + } + +} Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java Wed Feb 22 09:43:41 2017 @@ -151,6 +151,95 @@ public class XPathTest { } @Test + public void testExecTupleWithDontIgnoreNamespace() throws Exception { + + final XPath xpath = new XPath(); + + final Tuple tuple = mock(Tuple.class); + + when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" + + "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" + + "<bar:element>MyBar</bar:element>" + + "</foo:document>"); + + when(tuple.size()).thenReturn(4); + when(tuple.get(2)).thenReturn(true); + when(tuple.get(3)).thenReturn(false); + + when(tuple.get(1)).thenReturn("/foo:document/bar:element"); + assertEquals("MyBar", xpath.exec(tuple)); + + } + + @Test + public void testExecTupleWithDontIgnoreNamespace() throws Exception { + + final XPath xpath = new XPath(); + + final Tuple tuple = mock(Tuple.class); + + when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" + + "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" + + "<bar:element>MyBar</bar:element>" + + "</foo:document>"); + + when(tuple.size()).thenReturn(4); + when(tuple.get(2)).thenReturn(true); + when(tuple.get(3)).thenReturn(false); + + when(tuple.get(1)).thenReturn("/foo:document/bar:element"); + assertEquals("MyBar", xpath.exec(tuple)); + + } + + @Test + public void testExecTupleWithDontIgnoreNamespace() throws Exception { + + final XPath xpath = new XPath(); + + final Tuple tuple = mock(Tuple.class); + + when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" + + "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" + + "<bar:element>MyBar</bar:element>" + + "</foo:document>"); + + when(tuple.size()).thenReturn(4); + when(tuple.get(2)).thenReturn(true); + when(tuple.get(3)).thenReturn(false); + + when(tuple.get(1)).thenReturn("/foo:document/bar:element"); + assertEquals("MyBar", xpath.exec(tuple)); + + } + + + @Test + public void testFunctionInXPath() throws Exception { + + final XPath xpath = new XPath(); + + final Tuple tuple = mock(Tuple.class); + + when(tuple.get(0)).thenReturn("<Aa name=\"test1\">" + + "<Bb Cc=\"1\"/>" + + "<Bb Cc=\"1\"/>" + + "<Bb Cc=\"1\"/>" + + "<Bb Cc=\"1\"/>" + + "<Dd>test2</Dd>" + + "</Aa>"); + + when(tuple.size()).thenReturn(4); + when(tuple.get(1)).thenReturn("sum(Aa/Bb/@Cc)"); + when(tuple.get(2)).thenReturn(true); + when(tuple.get(3)).thenReturn(true); + + assertEquals("4", xpath.exec(tuple)); + + } + + + @Test public void testExecTupleWithElementNodeWithComplexNameSpace() throws Exception { final XPath xpath = new XPath(); @@ -210,7 +299,31 @@ public class XPathTest { assertEquals("4 stars3.5 stars4 stars4.2 stars3.5 stars", xpath.exec(tuple)); } - + + @Test + public void testFunctionInXPath() throws Exception { + + final XPath xpath = new XPath(); + + final Tuple tuple = mock(Tuple.class); + + when(tuple.get(0)).thenReturn("<Aa name=\"test1\">" + + "<Bb Cc=\"1\"/>" + + "<Bb Cc=\"1\"/>" + + "<Bb Cc=\"1\"/>" + + "<Bb Cc=\"1\"/>" + + "<Dd>test2</Dd>" + + "</Aa>"); + + when(tuple.size()).thenReturn(4); + when(tuple.get(1)).thenReturn("sum(Aa/Bb/@Cc)"); + when(tuple.get(2)).thenReturn(true); + when(tuple.get(3)).thenReturn(true); + + assertEquals("4", xpath.exec(tuple)); + + } + @Ignore //--optional test @Test public void testCacheBenefit() throws Exception{ Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java Wed Feb 22 09:43:41 2017 @@ -218,7 +218,7 @@ public class TestCSVExcelStorage { Util.registerMultiLineQuery(pig, script); Iterator<Tuple> it = pig.openIterator("a"); Assert.assertEquals(Util.createTuple(new String[] {"foo,\"bar\",baz"}), it.next()); - Assert.assertEquals(Util.createTuple(new String[] {"\"\"\"\""}), it.next()); + Assert.assertEquals(Util.createTuple(new String[] {"\"\"\""}), it.next()); } // Handle newlines in fields Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java?rev=1783988&r1=1783987&r2=1783988&view=diff ============================================================================== --- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java (original) +++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java Wed Feb 22 09:43:41 2017 @@ -109,7 +109,7 @@ public class TestLogFormatLoader { Tuple actual = out.get(0); Tuple expected = tuple( "2001:980:91c0:1:8d31:a232:25e5:85d", - "[05/Sep/2010:11:27:50 +0200]", + "05/Sep/2010:11:27:50 +0200", "koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066", map( "promo" , "koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066",