Hi All
I am really stuck here. i know this has been asked before but it just wont
solve for me. I am using anaconda distribution 3.5 and and i have build
spark-1.6.2 two times 1st time with hive and JDBC support through this
command
*mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-thriftserver
-DskipTests clean package* it gives hive exception
and 2nd time through this command
*./make-distribution.sh --name custom-spark --tgz -Psparkr -Phadoop-2.4
-Phive -Phive-thriftserver -Pyarn *it is also giving me exception.
i have also tried spark pre built version spark-1.6.1-bin-hadoop2.6 but the
exception remains the same..
the things i've tried to solve this
1) place hive-site.xml in spark\cpnf folder it was not there before.
2) set SPARK_HIVE = true
3) run sbt assembly
but the problem is still there.
here is the full error
You must build Spark with Hive. Export 'SPARK_HIVE=true' and run build/sbt
assembly
---
Py4JJavaError Traceback (most recent call last)
in ()
3
4 binary_map = {'Yes':1.0, 'No':0.0, 'True':1.0, 'False':0.0}
> 5 toNum = UserDefinedFunction(lambda k: binary_map[k], DoubleType())
6
7 CV_data = CV_data.drop('State').drop('Area code') .drop('Total
day charge').drop('Total eve charge') .drop('Total night
charge').drop('Total intl charge') .withColumn('Churn',
toNum(CV_data['Churn'])) .withColumn('International plan',
toNum(CV_data['International plan'])) .withColumn('Voice mail plan',
toNum(CV_data['Voice mail plan'])).cache()
C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\functions.py
in __init__(self, func, returnType, name)
1556 self.returnType = returnType
1557 self._broadcast = None
-> 1558 self._judf = self._create_judf(name)
1559
1560 def _create_judf(self, name):
C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\functions.py
in _create_judf(self, name)
1567 pickled_command, broadcast_vars, env, includes =
_prepare_for_python_RDD(sc, command, self)
1568 ctx = SQLContext.getOrCreate(sc)
-> 1569 jdt = ctx._ssql_ctx.parseDataType(self.returnType.json())
1570 if name is None:
1571 name = f.__name__ if hasattr(f, '__name__') else
f.__class__.__name__
C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\context.py
in _ssql_ctx(self)
681 try:
682 if not hasattr(self, '_scala_HiveContext'):
--> 683 self._scala_HiveContext = self._get_hive_ctx()
684 return self._scala_HiveContext
685 except Py4JError as e:
C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\context.py
in _get_hive_ctx(self)
690
691 def _get_hive_ctx(self):
--> 692 return self._jvm.HiveContext(self._jsc.sc())
693
694 def refreshTable(self, tableName):
C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\lib\py4j-0.9-src.zip\py4j\java_gateway.py
in __call__(self, *args)
1062 answer = self._gateway_client.send_command(command)
1063 return_value = get_return_value(
-> 1064 answer, self._gateway_client, None, self._fqn)
1065
1066 for temp_arg in temp_args:
C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\utils.py in
deco(*a, **kw)
43 def deco(*a, **kw):
44 try:
---> 45 return f(*a, **kw)
46 except py4j.protocol.Py4JJavaError as e:
47 s = e.java_exception.toString()
C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\lib\py4j-0.9-src.zip\py4j\protocol.py
in get_return_value(answer, gateway_client, target_id, name)
306 raise Py4JJavaError(
307 "An error occurred while calling {0}{1}{2}.\n".
--> 308 format(target_id, ".", name), value)
309 else:
310 raise Py4JError(
Py4JJavaError: An error occurred while calling
None.org.apache.spark.sql.hive.HiveContext.
: java.lang.RuntimeException: java.lang.NullPointerException
at
org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
at
org.apache.spark.sql.hive.client.ClientWrapper.(ClientWrapper.scala:204