I have a Jython UDF I've written that works fine in local mode but
bombs out when I run it on my cluster.
I'm running 0.8.0, and my stack trace and environment variables are below.
java.io.IOException: Deserialization error: could not instantiate
'org.apache.pig.scripting.jython.JythonFunction' with arguments
'[src/apachelogs.py, extract_apache_log]'
at
org.apache.pig.impl.util.ObjectSerializer.deserialize(ObjectSerializer.java:55)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.setup(PigMapBase.java:151)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:142)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:646)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:322)
at org.apache.hadoop.mapred.Child$4.run(Child.java:240)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1115)
export CLASSPATH=/usr/share/java/*:/home/mroddy/jython/jython.jar
export HADOOP_HOME=/usr/lib/hadoop
export
PIG_CLASSPATH=/usr/lib/pig/contrib/piggybank/java/piggybank.jar:/usr/share/java/joda-time.jar:/home/mroddy/jython/jython.jar