I am using spark-1.3 with hadoop-provided and hive-provided and hive-0.13.1
profiles. I am running a simple spark job on a yarn cluster by adding all
hadoop2 and hive13 jars to the spark classpaths.

If I remove the hive-provided while building spark, I dont face any issue.
But with hive-provided I am getting a "java.lang.NoClassDefFoundError:
org/apache/hadoop/hive/conf/HiveConf" in the yarn executor.

Code is below:
import org.apache.spark._
import org.apache.spark.sql._
import org.apache.hadoop.hive.conf.HiveConf

object Simple {
  def main(args: Array[String]) = {
    val sc = new SparkContext(new SparkConf())
    val sqlC = new  org.apache.spark.sql.hive.HiveContext(sc)

    val x = sc.parallelize(1 to 2).map(x =>
      { val h = new HiveConf; h.getBoolean("hive.test", false) })
    x.collect.foreach(x => println(s"-----------------  $x
----------------"))

    val result = sqlC.sql("""
      select * from products_avro order by month, name, price
      """)
    result.collect.foreach(println)
  }
}

The first job (involving map) runs fine. HiveConf is instantiated and the
conf variable is looked up etc. But the second job (involving the select *
query) throws the class not found exception.

The task deserializer is the one throwing the exception. It is unable to
find the class in its classpath. Not sure what is different from the first
job which also involved HiveConf.

157573 [task-result-getter-3] 2015/04/20 11:01:48:287 WARN TaskSetManager:
Lost task 0.2 in stage 2.0 (TID 4, localhost):
java.lang.NoClassDefFoundError: org/apache/hadoop/hive/conf/HiveConf
    at java.lang.Class.getDeclaredFields0(Native Method)
    at java.lang.Class.privateGetDeclaredFields(Class.java:2436)
    at java.lang.Class.getDeclaredField(Class.java:1946)
    at
java.io.ObjectStreamClass.getDeclaredSUID(ObjectStreamClass.java:1659)
    at java.io.ObjectStreamClass.access$700(ObjectStreamClass.java:72)
    at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:480)
    at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:468)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.io.ObjectStreamClass.<init>(ObjectStreamClass.java:468)
    at java.io.ObjectStreamClass.lookup(ObjectStreamClass.java:365)
    at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:602)
    at
java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1622)
    at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1517)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1771)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
    at scala.collection.immutable.$colon$colon.readObject(List.scala:362)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at
java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1017)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1893)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
    at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
    at
org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:68)
    at
org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:94)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:57)
    at org.apache.spark.scheduler.Task.run(Task.scala:64)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
    at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
    at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException:
org.apache.hadoop.hive.conf.HiveConf
    at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
    ... 70 more

-- Thanks

Reply via email to