Hi Nebojsa,

You're absolutely right. CDH4.x compiles everything against hadoop-2.0.x,
so HadoopJobHistoryLoader is excluded. Thank you very much for pointing
that out.

This is a packaging bug as I see it, and I am going to get it fixed in next
release. In the meantime, could you apply the patch that I added at the end
and build piggybank.jar from the source tarball by yourself?

1) wget http://archive.cloudera.com/cdh4/cdh/4/pig-0.9.2-cdh4.0.1.tar.gz
2) tar -xf pig-0.9.2-cdh4.0.1.tar.gz
3) cd pig-0.9.2-cdh4.0.1
4) patch -p0 -i <this patch>
5) ant clean compile-test jar-withouthadoop -Dhadoopversion=23
6) cd contrib/piggybank/java
7) ant clean jar -Dhadoopversion=20 -Dmr1.test=mr1

Now you will find piggybank.jar built in the current directory, and it
contains HadoopJobHistoryLoader as follows:

8) jar -tvf piggybank.jar | grep HadoopJobHistoryLoader
  1866 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$1.class
  1885 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$HadoopJobHistoryInputFormat.class
  5769 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$HadoopJobHistoryReader.class
   943 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobHistoryPathFilter.class
  3460 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobKeys.class
  2681 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobXMLHandler.class
   751 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$MRJobInfo.class
 16364 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader.class

You can also run the unit test as follows:

9) ant clean test -Dhadoopversion=20 -Dmr1.test=mr1
-Dtestcase=TestHadoopJobHistoryLoader

Please let me know if this works for you.

Thanks!
Cheolsoo



diff --git contrib/piggybank/java/build.xml contrib/piggybank/java/build.xml
index b162dbd..1616e38 100755
--- contrib/piggybank/java/build.xml
+++ contrib/piggybank/java/build.xml
@@ -15,7 +15,15 @@
    limitations under the License.
 -->

-<project basedir="." default="jar" name="pigudf">
+<project basedir="." default="jar" name="pigudf"
+         xmlns:artifact="urn:maven-artifact-ant"
+         xmlns:ivy="antlib:org.apache.ivy.ant">
+    <taskdef resource="net/sf/antcontrib/antcontrib.properties">
+        <classpath>
+            <pathelement
location="../../../cloudera/maven-packaging/lib/ant-contrib-1.0b3.jar"/>
+        </classpath>
+    </taskdef>
+
     <!-- javac properties -->
     <property name="javac.debug" value="on" />
     <property name="javac.level" value="source,lines,vars"/>
@@ -39,6 +47,17 @@
     <property name="hsqldb.jar"
value="../../../build/ivy/lib/Pig/hsqldb-1.8.0.10.jar"/>
     <property name="ivy.lib.dir" value="../../../build/ivy/lib/Pig"/>

+    <property name="src.shims.dir"
value="../../../shims/src/hadoop${hadoopversion}" />
+    <if>
+        <equals arg1="${mr1.test}" arg2="mr1"/>
+        <then>
+            <property name="src.shims.test.dir"
value="../../../shims/test/hadoop20" />
+        </then>
+        <else>
+            <property name="src.shims.test.dir"
value="../../../shims/test/hadoop${hadoopversion}" />
+        </else>
+    </if>
+
  <!-- JobHistoryLoader currently does not support 0.23 -->
     <condition property="build.classes.excludes"
value="**/HadoopJobHistoryLoader.java" else="">
         <equals arg1="${hadoopversion}" arg2="23"/>
@@ -59,14 +78,99 @@
     <property name="test.src.dir" value="src/test/java" />
     <property name="junit.hadoop.conf" value="${user.home}/pigtest/conf/"/>

-    <path id="pigudf.classpath">
-        <pathelement location="${build.classes}"/>
-        <pathelement location="${pigjar-withouthadoop}"/>
-        <pathelement location="${pigtest}"/>
-        <fileset dir="../../../build/ivy/lib">
-            <include name="**/*.jar"/>
-        </fileset>
-    </path>
+    <property name="ivy.dir" location="../../../ivy" />
+    <property name="build.ivy.dir" location="${build.dir}/ivy" />
+    <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
+    <property name="ivy.lib.dir" location="${build.ivy.lib.dir}/${
ant.project.name}"/>
+    <property name="build.ivy.report.dir"
location="${build.ivy.dir}/report" />
+    <property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven"
/>
+    <property name="build.ivy.maven.pom"
location="${build.ivy.maven.dir}/pig-${version}.pom" />
+    <property name="build.ivy.maven.jar"
location="${build.ivy.maven.dir}/pig-${version}-core.jar" />
+
+    <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
+    <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"
/>
+    <property name="ivy.jar" location="${ivy.dir}/ivy-${ivy.version}.jar"/>
+    <property name="mvnrepo" value="http://repo2.maven.org/maven2"/>
+    <property name="ivy_repo_url"
value="${mvnrepo}/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar"/>
+
+    <target name="ivy-init-dirs">
+        <mkdir dir="${build.ivy.dir}" />
+        <mkdir dir="${build.ivy.lib.dir}" />
+        <mkdir dir="${build.ivy.report.dir}" />
+        <mkdir dir="${build.ivy.maven.dir}" />
+        <copy todir="${basedir}/" file="../../../ivy.xml" />
+    </target>
+
+    <target name="ivy-probe-antlib" >
+        <condition property="ivy.found">
+         <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
+        </condition>
+    </target>
+
+    <target name="ivy-download" description="To download ivy"
unless="offline">
+        <get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
+    </target>
+
+    <!--
+    To avoid Ivy leaking things across big projects, always load Ivy in
the same classloader.
+    Also note how we skip loading Ivy if it is already there, just to make
sure all is well.
+    -->
+    <target name="ivy-init-antlib"
depends="ivy-download,ivy-init-dirs,ivy-probe-antlib" unless="ivy.found">
+       <typedef uri="antlib:org.apache.ivy.ant" onerror="fail"
loaderRef="ivyLoader">
+         <classpath>
+            <pathelement location="${ivy.jar}"/>
+         </classpath>
+       </typedef>
+       <fail>
+         <condition >
+           <not>
+             <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
+           </not>
+         </condition>
+         You need Apache Ivy 2.0 or later from http://ant.apache.org/
+         It could not be loaded from ${ivy_repo_url}
+       </fail>
+    </target>
+
+    <target name="ivy-init" depends="ivy-init-antlib" >
+       <ivy:configure settingsid="${ant.project.name}.ivy.settings"
file="${ivysettings.xml}" override='false'/>
+    </target>
+
+    <target name="ivy-mr1-test" depends="ivy-init" description="Resolve,
Retrieve Ivy-managed artifacts for test configuration" if="mr1.test">
+       <ivy:resolve settingsRef="${ant.project.name}.ivy.settings"
conf="test"/>
+       <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+
pattern="${build.ivy.lib.dir}/mr1/${ivy.artifact.retrieve.pattern}"
conf="mr1.test"/>
+       <ivy:cachepath pathid="mr1.test.classpath" conf="mr1.test"
type="jar,test-jar"/>
+    </target>
+
+    <if>
+      <equals arg1="${mr1.test}" arg2="mr1"/>
+      <then>
+        <path id="pigudf.classpath">
+            <pathelement location="../../../build/classes"/>
+            <pathelement location="${build.classes}"/>
+            <pathelement location="${pigjar-withouthadoop}"/>
+            <pathelement location="${pigtest}"/>
+            <fileset dir="../../../build/ivy/lib">
+                <include name="**/*.jar"/>
+                <exclude name="hadoop-mapreduce*.jar" />
+                <exclude name="hadoop-yarn*.jar" />
+            </fileset>
+            <path refid="mr1.test.classpath"/>
+        </path>
+      </then>
+      <else>
+        <path id="pigudf.classpath">
+            <pathelement location="../../../build/classes"/>
+            <pathelement location="${build.classes}"/>
+            <pathelement location="${pigjar-withouthadoop}"/>
+            <pathelement location="${pigtest}"/>
+            <fileset dir="../../../build/ivy/lib">
+                <include name="**/*.jar"/>
+            </fileset>
+        </path>
+      </else>
+    </if>

     <path id="test.classpath">
         <pathelement location="${udfjar}"/>
@@ -87,7 +191,7 @@
     <target name="clean">
         <delete dir="build"/>
     </target>
-    <target depends="init" name="compile" description="compile all of the
class files">
+    <target depends="init, ivy-mr1-test" name="compile"
description="compile all of the class files">
         <echo> *** Compiling Pig UDFs ***</echo>
         <javac srcdir="${src.dir}" debug="${javac.debug}"
debuglevel="${javac.level}" destdir="${build.classes}"
source="${javac.version}"
         target="${javac.version}" optimize="${javac.optimize}"
deprecation="${javac.deprecation}" excludes="${build.classes.excludes}">
diff --git
contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroDatumWriter.java
contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroDatumWriter.java
index 5a87932..bf3de11 100644
---
contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroDatumWriter.java
+++
contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroDatumWriter.java
@@ -108,7 +108,7 @@ public class PigAvroDatumWriter extends
GenericDatumWriter<Object> {
      */
     protected void writeUnion(Schema schema, Object datum, Encoder out)
                                     throws IOException {
-        int index = resolveUnion(schema, datum);
+        int index = resolveUnionSchema(schema, datum);
         out.writeIndex(index);
         write(schema.getTypes().get(index), datum, out);
     }
@@ -116,7 +116,7 @@ public class PigAvroDatumWriter extends
GenericDatumWriter<Object> {
     /**
      * Called to resolve union.
      */
-    protected int resolveUnion(Schema union, Object datum) throws
IOException {
+    protected int resolveUnionSchema(Schema union, Object datum) throws
IOException {
         int i = 0;
         for (Schema type : union.getTypes()) {
             if (type.getType().equals(Schema.Type.UNION))
@@ -130,7 +130,7 @@ public class PigAvroDatumWriter extends
GenericDatumWriter<Object> {

     /**
      * Recursively check whether "datum" is an instance of "schema" and
called
-     * by {@link #resolveUnion(Schema,Object)},
+     * by {@link #resolveUnionSchema(Schema,Object)},
      * {@link #unwrappedInstanceOf(Schema,Object)}.
      *
      */
@@ -156,7 +156,7 @@ public class PigAvroDatumWriter extends
GenericDatumWriter<Object> {

             case UNION:
                 @SuppressWarnings("unused")
-                int index = resolveUnion(schema, datum);
+                int index = resolveUnionSchema(schema, datum);
                 return true;
             case ENUM:
                 return datum instanceof String &&
schema.hasEnumSymbol(((String) datum))
diff --git ivy.xml ivy.xml
index 5538a8c..2e5c1cb 100644
--- ivy.xml
+++ ivy.xml
@@ -40,6 +40,7 @@
     <conf name="buildJar" extends="compile,test" visibility="private"/>
     <conf name="hadoop20" visibility="private"/>
     <conf name="hadoop23" visibility="private"/>
+    <conf name="mr1.test" visibility="private"/>
   </configurations>
   <publications>
     <!--get the artifact from our module name-->
@@ -107,7 +108,7 @@
     <dependency org="com.sun.jersey" name="jersey-core"
rev="${jersey-core.version}"
       conf="hadoop20->default"/>
     <dependency org="org.apache.hadoop" name="hadoop-core"
rev="${hadoop-core.version}"
-      conf="hadoop20->default">
+      conf="hadoop20->default; mr1.test->default">
       <exclude org="org.slf4j" module="slf4j"/>
       <exclude org="org.slf4j" module="slf4j-api"/>
       <exclude org="org.slf4j" module="log4j12"/>
@@ -118,7 +119,7 @@
     </dependency>

     <dependency org="org.apache.hadoop" name="hadoop-test"
rev="${hadoop-test.version}"
-      conf="hadoop20->default">
+      conf="hadoop20->default; mr1.test->default">
       <exclude org="org.slf4j" module="slf4j"/>
       <exclude org="org.slf4j" module="slf4j-api"/>
       <exclude org="org.slf4j" module="log4j12"/>

Reply via email to