Hello I know this question is already asked.. but no one answer that..that is why I am asking again. I am using anaconda3.5 distribution and spark 1.6.2 I have been following this blog <http://nbviewer.jupyter.org/github/bensadeghi/pyspark-churn-prediction/blob/master/churn-prediction.ipynb> . it was running fine untill i reached at 7th cell here from pyspark.sql.types import DoubleType from pyspark.sql.functions import UserDefinedFunction
binary_map = {'Yes':1.0, 'No':0.0, 'True':1.0, 'False':0.0} toNum = UserDefinedFunction(lambda k: binary_map[k], DoubleType()) CV_data = CV_data.drop('State').drop('Area code') \ .drop('Total day charge').drop('Total eve charge') \ .drop('Total night charge').drop('Total intl charge') \ .withColumn('Churn', toNum(CV_data['Churn'])) \ .withColumn('International plan', toNum(CV_data['International plan'])) \ .withColumn('Voice mail plan', toNum(CV_data['Voice mail plan'])).cache() final_test_data = final_test_data.drop('State').drop('Area code') \ .drop('Total day charge').drop('Total eve charge') \ .drop('Total night charge').drop('Total intl charge') \ .withColumn('Churn', toNum(final_test_data['Churn'])) \ .withColumn('International plan', toNum(final_test_data['International plan'])) \ .withColumn('Voice mail plan', toNum(final_test_data['Voice mail plan'])).cache() here i am getting exception You must build Spark with Hive. Export 'SPARK_HIVE=true' and run build/sbt assembly --------------------------------------------------------------------------- Py4JJavaError Traceback (most recent call last) <ipython-input-7-6db2287430d4> in <module>() 3 4 binary_map = {'Yes':1.0, 'No':0.0, 'True':1.0, 'False':0.0} ----> 5 toNum = UserDefinedFunction(lambda k: binary_map[k], DoubleType()) 6 7 CV_data = CV_data.drop('State').drop('Area code') .drop('Total day charge').drop('Total eve charge') .drop('Total night charge').drop('Total intl charge') .withColumn('Churn', toNum(CV_data['Churn'])) .withColumn('International plan', toNum(CV_data['International plan'])) .withColumn('Voice mail plan', toNum(CV_data['Voice mail plan'])).cache() C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\functions.py in __init__(self, func, returnType, name) 1556 self.returnType = returnType 1557 self._broadcast = None -> 1558 self._judf = self._create_judf(name) 1559 1560 def _create_judf(self, name): C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\functions.py in _create_judf(self, name) 1567 pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command, self) 1568 ctx = SQLContext.getOrCreate(sc) -> 1569 jdt = ctx._ssql_ctx.parseDataType(self.returnType.json()) 1570 if name is None: 1571 name = f.__name__ if hasattr(f, '__name__') else f.__class__.__name__ C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\context.py in _ssql_ctx(self) 681 try: 682 if not hasattr(self, '_scala_HiveContext'): --> 683 self._scala_HiveContext = self._get_hive_ctx() 684 return self._scala_HiveContext 685 except Py4JError as e: C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\context.py in _get_hive_ctx(self) 690 691 def _get_hive_ctx(self): --> 692 return self._jvm.HiveContext(self._jsc.sc()) 693 694 def refreshTable(self, tableName): C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\lib\py4j-0.9-src.zip\py4j\java_gateway.py in __call__(self, *args) 1062 answer = self._gateway_client.send_command(command) 1063 return_value = get_return_value( -> 1064 answer, self._gateway_client, None, self._fqn) 1065 1066 for temp_arg in temp_args: C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\pyspark\sql\utils.py in deco(*a, **kw) 43 def deco(*a, **kw): 44 try: ---> 45 return f(*a, **kw) 46 except py4j.protocol.Py4JJavaError as e: 47 s = e.java_exception.toString() C:\Users\InAm-Ur-Rehman\Sparkkk\spark-1.6.2\python\lib\py4j-0.9-src.zip\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name) 306 raise Py4JJavaError( 307 "An error occurred while calling {0}{1}{2}.\n". --> 308 format(target_id, ".", name), value) 309 else: 310 raise Py4JError( Py4JJavaError: An error occurred while calling None.org.apache.spark.sql.hive.HiveContext. : java.lang.RuntimeException: java.lang.NullPointerException at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522) at org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:204) at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:238) at org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:218) at org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:208) at org.apache.spark.sql.hive.HiveContext.functionRegistry$lzycompute(HiveContext.scala:462) at org.apache.spark.sql.hive.HiveContext.functionRegistry(HiveContext.scala:461) at org.apache.spark.sql.UDFRegistration.<init>(UDFRegistration.scala:40) at org.apache.spark.sql.SQLContext.<init>(SQLContext.scala:330) at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:90) at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:101) at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:422) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381) at py4j.Gateway.invoke(Gateway.java:214) at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79) at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68) at py4j.GatewayConnection.run(GatewayConnection.java:209) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.NullPointerException at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012) at org.apache.hadoop.util.Shell.runCommand(Shell.java:445) at org.apache.hadoop.util.Shell.run(Shell.java:418) at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:650) at org.apache.hadoop.util.Shell.execCommand(Shell.java:739) at org.apache.hadoop.util.Shell.execCommand(Shell.java:722) at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:1097) at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:559) at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:534) at org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:599) at org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554) at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508) ... 21 more -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/how-to-resolve-you-must-build-spark-with-hive-exception-tp27390.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe e-mail: user-unsubscr...@spark.apache.org