Hi Nebojsa, You're absolutely right. CDH4.x compiles everything against hadoop-2.0.x, so HadoopJobHistoryLoader is excluded. Thank you very much for pointing that out.
This is a packaging bug as I see it, and I am going to get it fixed in next release. In the meantime, could you apply the patch that I added at the end and build piggybank.jar from the source tarball by yourself? 1) wget http://archive.cloudera.com/cdh4/cdh/4/pig-0.9.2-cdh4.0.1.tar.gz 2) tar -xf pig-0.9.2-cdh4.0.1.tar.gz 3) cd pig-0.9.2-cdh4.0.1 4) patch -p0 -i <this patch> 5) ant clean compile-test jar-withouthadoop -Dhadoopversion=23 6) cd contrib/piggybank/java 7) ant clean jar -Dhadoopversion=20 -Dmr1.test=mr1 Now you will find piggybank.jar built in the current directory, and it contains HadoopJobHistoryLoader as follows: 8) jar -tvf piggybank.jar | grep HadoopJobHistoryLoader 1866 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$1.class 1885 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$HadoopJobHistoryInputFormat.class 5769 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$HadoopJobHistoryReader.class 943 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobHistoryPathFilter.class 3460 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobKeys.class 2681 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobXMLHandler.class 751 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$MRJobInfo.class 16364 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader.class You can also run the unit test as follows: 9) ant clean test -Dhadoopversion=20 -Dmr1.test=mr1 -Dtestcase=TestHadoopJobHistoryLoader Please let me know if this works for you. Thanks! Cheolsoo diff --git contrib/piggybank/java/build.xml contrib/piggybank/java/build.xml index b162dbd..1616e38 100755 --- contrib/piggybank/java/build.xml +++ contrib/piggybank/java/build.xml @@ -15,7 +15,15 @@ limitations under the License. --> -<project basedir="." default="jar" name="pigudf"> +<project basedir="." default="jar" name="pigudf" + xmlns:artifact="urn:maven-artifact-ant" + xmlns:ivy="antlib:org.apache.ivy.ant"> + <taskdef resource="net/sf/antcontrib/antcontrib.properties"> + <classpath> + <pathelement location="../../../cloudera/maven-packaging/lib/ant-contrib-1.0b3.jar"/> + </classpath> + </taskdef> + <!-- javac properties --> <property name="javac.debug" value="on" /> <property name="javac.level" value="source,lines,vars"/> @@ -39,6 +47,17 @@ <property name="hsqldb.jar" value="../../../build/ivy/lib/Pig/hsqldb-1.8.0.10.jar"/> <property name="ivy.lib.dir" value="../../../build/ivy/lib/Pig"/> + <property name="src.shims.dir" value="../../../shims/src/hadoop${hadoopversion}" /> + <if> + <equals arg1="${mr1.test}" arg2="mr1"/> + <then> + <property name="src.shims.test.dir" value="../../../shims/test/hadoop20" /> + </then> + <else> + <property name="src.shims.test.dir" value="../../../shims/test/hadoop${hadoopversion}" /> + </else> + </if> + <!-- JobHistoryLoader currently does not support 0.23 --> <condition property="build.classes.excludes" value="**/HadoopJobHistoryLoader.java" else=""> <equals arg1="${hadoopversion}" arg2="23"/> @@ -59,14 +78,99 @@ <property name="test.src.dir" value="src/test/java" /> <property name="junit.hadoop.conf" value="${user.home}/pigtest/conf/"/> - <path id="pigudf.classpath"> - <pathelement location="${build.classes}"/> - <pathelement location="${pigjar-withouthadoop}"/> - <pathelement location="${pigtest}"/> - <fileset dir="../../../build/ivy/lib"> - <include name="**/*.jar"/> - </fileset> - </path> + <property name="ivy.dir" location="../../../ivy" /> + <property name="build.ivy.dir" location="${build.dir}/ivy" /> + <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" /> + <property name="ivy.lib.dir" location="${build.ivy.lib.dir}/${ ant.project.name}"/> + <property name="build.ivy.report.dir" location="${build.ivy.dir}/report" /> + <property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven" /> + <property name="build.ivy.maven.pom" location="${build.ivy.maven.dir}/pig-${version}.pom" /> + <property name="build.ivy.maven.jar" location="${build.ivy.maven.dir}/pig-${version}-core.jar" /> + + <loadproperties srcfile="${ivy.dir}/libraries.properties"/> + <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml" /> + <property name="ivy.jar" location="${ivy.dir}/ivy-${ivy.version}.jar"/> + <property name="mvnrepo" value="http://repo2.maven.org/maven2"/> + <property name="ivy_repo_url" value="${mvnrepo}/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar"/> + + <target name="ivy-init-dirs"> + <mkdir dir="${build.ivy.dir}" /> + <mkdir dir="${build.ivy.lib.dir}" /> + <mkdir dir="${build.ivy.report.dir}" /> + <mkdir dir="${build.ivy.maven.dir}" /> + <copy todir="${basedir}/" file="../../../ivy.xml" /> + </target> + + <target name="ivy-probe-antlib" > + <condition property="ivy.found"> + <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/> + </condition> + </target> + + <target name="ivy-download" description="To download ivy" unless="offline"> + <get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/> + </target> + + <!-- + To avoid Ivy leaking things across big projects, always load Ivy in the same classloader. + Also note how we skip loading Ivy if it is already there, just to make sure all is well. + --> + <target name="ivy-init-antlib" depends="ivy-download,ivy-init-dirs,ivy-probe-antlib" unless="ivy.found"> + <typedef uri="antlib:org.apache.ivy.ant" onerror="fail" loaderRef="ivyLoader"> + <classpath> + <pathelement location="${ivy.jar}"/> + </classpath> + </typedef> + <fail> + <condition > + <not> + <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/> + </not> + </condition> + You need Apache Ivy 2.0 or later from http://ant.apache.org/ + It could not be loaded from ${ivy_repo_url} + </fail> + </target> + + <target name="ivy-init" depends="ivy-init-antlib" > + <ivy:configure settingsid="${ant.project.name}.ivy.settings" file="${ivysettings.xml}" override='false'/> + </target> + + <target name="ivy-mr1-test" depends="ivy-init" description="Resolve, Retrieve Ivy-managed artifacts for test configuration" if="mr1.test"> + <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="test"/> + <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings" + pattern="${build.ivy.lib.dir}/mr1/${ivy.artifact.retrieve.pattern}" conf="mr1.test"/> + <ivy:cachepath pathid="mr1.test.classpath" conf="mr1.test" type="jar,test-jar"/> + </target> + + <if> + <equals arg1="${mr1.test}" arg2="mr1"/> + <then> + <path id="pigudf.classpath"> + <pathelement location="../../../build/classes"/> + <pathelement location="${build.classes}"/> + <pathelement location="${pigjar-withouthadoop}"/> + <pathelement location="${pigtest}"/> + <fileset dir="../../../build/ivy/lib"> + <include name="**/*.jar"/> + <exclude name="hadoop-mapreduce*.jar" /> + <exclude name="hadoop-yarn*.jar" /> + </fileset> + <path refid="mr1.test.classpath"/> + </path> + </then> + <else> + <path id="pigudf.classpath"> + <pathelement location="../../../build/classes"/> + <pathelement location="${build.classes}"/> + <pathelement location="${pigjar-withouthadoop}"/> + <pathelement location="${pigtest}"/> + <fileset dir="../../../build/ivy/lib"> + <include name="**/*.jar"/> + </fileset> + </path> + </else> + </if> <path id="test.classpath"> <pathelement location="${udfjar}"/> @@ -87,7 +191,7 @@ <target name="clean"> <delete dir="build"/> </target> - <target depends="init" name="compile" description="compile all of the class files"> + <target depends="init, ivy-mr1-test" name="compile" description="compile all of the class files"> <echo> *** Compiling Pig UDFs ***</echo> <javac srcdir="${src.dir}" debug="${javac.debug}" debuglevel="${javac.level}" destdir="${build.classes}" source="${javac.version}" target="${javac.version}" optimize="${javac.optimize}" deprecation="${javac.deprecation}" excludes="${build.classes.excludes}"> diff --git contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroDatumWriter.java contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroDatumWriter.java index 5a87932..bf3de11 100644 --- contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroDatumWriter.java +++ contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroDatumWriter.java @@ -108,7 +108,7 @@ public class PigAvroDatumWriter extends GenericDatumWriter<Object> { */ protected void writeUnion(Schema schema, Object datum, Encoder out) throws IOException { - int index = resolveUnion(schema, datum); + int index = resolveUnionSchema(schema, datum); out.writeIndex(index); write(schema.getTypes().get(index), datum, out); } @@ -116,7 +116,7 @@ public class PigAvroDatumWriter extends GenericDatumWriter<Object> { /** * Called to resolve union. */ - protected int resolveUnion(Schema union, Object datum) throws IOException { + protected int resolveUnionSchema(Schema union, Object datum) throws IOException { int i = 0; for (Schema type : union.getTypes()) { if (type.getType().equals(Schema.Type.UNION)) @@ -130,7 +130,7 @@ public class PigAvroDatumWriter extends GenericDatumWriter<Object> { /** * Recursively check whether "datum" is an instance of "schema" and called - * by {@link #resolveUnion(Schema,Object)}, + * by {@link #resolveUnionSchema(Schema,Object)}, * {@link #unwrappedInstanceOf(Schema,Object)}. * */ @@ -156,7 +156,7 @@ public class PigAvroDatumWriter extends GenericDatumWriter<Object> { case UNION: @SuppressWarnings("unused") - int index = resolveUnion(schema, datum); + int index = resolveUnionSchema(schema, datum); return true; case ENUM: return datum instanceof String && schema.hasEnumSymbol(((String) datum)) diff --git ivy.xml ivy.xml index 5538a8c..2e5c1cb 100644 --- ivy.xml +++ ivy.xml @@ -40,6 +40,7 @@ <conf name="buildJar" extends="compile,test" visibility="private"/> <conf name="hadoop20" visibility="private"/> <conf name="hadoop23" visibility="private"/> + <conf name="mr1.test" visibility="private"/> </configurations> <publications> <!--get the artifact from our module name--> @@ -107,7 +108,7 @@ <dependency org="com.sun.jersey" name="jersey-core" rev="${jersey-core.version}" conf="hadoop20->default"/> <dependency org="org.apache.hadoop" name="hadoop-core" rev="${hadoop-core.version}" - conf="hadoop20->default"> + conf="hadoop20->default; mr1.test->default"> <exclude org="org.slf4j" module="slf4j"/> <exclude org="org.slf4j" module="slf4j-api"/> <exclude org="org.slf4j" module="log4j12"/> @@ -118,7 +119,7 @@ </dependency> <dependency org="org.apache.hadoop" name="hadoop-test" rev="${hadoop-test.version}" - conf="hadoop20->default"> + conf="hadoop20->default; mr1.test->default"> <exclude org="org.slf4j" module="slf4j"/> <exclude org="org.slf4j" module="slf4j-api"/> <exclude org="org.slf4j" module="log4j12"/>
