Hi , I am getting error :- --------------------------------------------------------------------------- Py4JError Traceback (most recent call last) <ipython-input-2-a208cdba2c46> in <module>() 3 TOTAL = 1000000 4 dots = sc.parallelize([2.0 * np.random.random(2) - 1.0 for i in range( TOTAL)]).cache() ----> 5 print("Number of random points:", dots.count()) 6 7 stats = dots.stats() C:\opt\spark\python\pyspark\rdd.py in count(self) 1039 3 1040 """ -> 1041 return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum() 1042 1043 def stats(self): C:\opt\spark\python\pyspark\rdd.py in sum(self) 1030 6.0 1031 """ -> 1032 return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add) 1033 1034 def count(self): C:\opt\spark\python\pyspark\rdd.py in fold(self, zeroValue, op) 904 # zeroValue provided to each partition is unique from the one provided 905 # to the final reduce call --> 906 vals = self.mapPartitions(func).collect() 907 return reduce(op, vals, zeroValue) 908 C:\opt\spark\python\pyspark\rdd.py in collect(self) 807 """ 808 with SCCallSiteSync(self.context) as css: --> 809 port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd()) 810 return list(_load_from_socket(port, self._jrdd_deserializer)) 811 C:\opt\spark\python\pyspark\rdd.py in _jrdd(self) 2453 2454 wrapped_func = _wrap_function(self.ctx, self.func, self._prev_jrdd_deserializer, -> 2455 self._jrdd_deserializer, profiler) 2456 python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(), wrapped_func, 2457 self.preservesPartitioning) C:\opt\spark\python\pyspark\rdd.py in _wrap_function(sc, func, deserializer, serializer, profiler) 2388 pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command) 2389 return sc._jvm.PythonFunction(bytearray(pickled_command), env, includes, sc.pythonExec, -> 2390 sc.pythonVer, broadcast_vars, sc._javaAccumulator) 2391 2392 C:\ProgramData\Anaconda3\lib\site-packages\py4j\java_gateway.py in __call__(self, *args) 1426 answer = self._gateway_client.send_command(command) 1427 return_value = get_return_value( -> 1428 answer, self._gateway_client, None, self._fqn) 1429 1430 for temp_arg in temp_args: C:\opt\spark\python\pyspark\sql\utils.py in deco(*a, **kw) 61 def deco(*a, **kw): 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString() C:\ProgramData\Anaconda3\lib\site-packages\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name) 322 raise Py4JError( 323 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n". --> 324 format(target_id, ".", name, value)) 325 else: 326 raise Py4JError( Py4JError: An error occurred while calling None.org.apache.spark.api.python.PythonFunction. Trace: py4j.Py4JException: Constructor org.apache.spark.api.python.PythonFunction([class [B, class java.util.HashMap, class java.util.ArrayList, class java.lang.String, class java.lang.String, class java.util.ArrayList, class org.apache.spark.api.python.PythonAccumulatorV2]) does not exist at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:179) at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:196) at py4j.Gateway.invoke(Gateway.java:235) at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80) at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69) at py4j.GatewayConnection.run(GatewayConnection.java:214) at java.lang.Thread.run(Thread.java:745)
I installed Spark 2.0.2 on windows 10 and code is as below:- > sc = SparkContext.getOrCreate() > sc > import numpy as np > TOTAL = 1000000 > dots = sc.parallelize([2.0 * np.random.random(2) - 1.0 for i in > range(TOTAL)]).cache() > print("Number of random points:", dots.count()) > stats = dots.stats() > print('Mean:', stats.mean()) > print('stdev:', stats.stdev()) Getting error , when I am running Numphy code. Please tell me what's wrong in this.