Hi ,
I am getting error :-

---------------------------------------------------------------------------
Py4JError Traceback (most recent call last)
<ipython-input-2-a208cdba2c46> in <module>()
3 TOTAL = 1000000
4 dots = sc.parallelize([2.0 * np.random.random(2) - 1.0 for i in range(
TOTAL)]).cache()
----> 5 print("Number of random points:", dots.count())
6
7 stats = dots.stats()
C:\opt\spark\python\pyspark\rdd.py in count(self)
1039 3
1040 """
-> 1041 return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
1042
1043 def stats(self):
C:\opt\spark\python\pyspark\rdd.py in sum(self)
1030 6.0
1031 """
-> 1032 return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
1033
1034 def count(self):
C:\opt\spark\python\pyspark\rdd.py in fold(self, zeroValue, op)
904 # zeroValue provided to each partition is unique from the one provided
905 # to the final reduce call
--> 906 vals = self.mapPartitions(func).collect()
907 return reduce(op, vals, zeroValue)
908
C:\opt\spark\python\pyspark\rdd.py in collect(self)
807 """
808 with SCCallSiteSync(self.context) as css:
--> 809 port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
810 return list(_load_from_socket(port, self._jrdd_deserializer))
811
C:\opt\spark\python\pyspark\rdd.py in _jrdd(self)
2453
2454 wrapped_func = _wrap_function(self.ctx, self.func,
self._prev_jrdd_deserializer,
-> 2455 self._jrdd_deserializer, profiler)
2456 python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
wrapped_func,
2457 self.preservesPartitioning)
C:\opt\spark\python\pyspark\rdd.py in _wrap_function(sc, func,
deserializer, serializer, profiler)
2388 pickled_command, broadcast_vars, env, includes =
_prepare_for_python_RDD(sc, command)
2389 return sc._jvm.PythonFunction(bytearray(pickled_command), env,
includes, sc.pythonExec,
-> 2390 sc.pythonVer, broadcast_vars, sc._javaAccumulator)
2391
2392
C:\ProgramData\Anaconda3\lib\site-packages\py4j\java_gateway.py in
__call__(self,
*args)
1426 answer = self._gateway_client.send_command(command)
1427 return_value = get_return_value(
-> 1428 answer, self._gateway_client, None, self._fqn)
1429
1430 for temp_arg in temp_args:
C:\opt\spark\python\pyspark\sql\utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
C:\ProgramData\Anaconda3\lib\site-packages\py4j\protocol.py in
get_return_value(answer, gateway_client, target_id, name)
322 raise Py4JError(
323 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
--> 324 format(target_id, ".", name, value))
325 else:
326 raise Py4JError(
Py4JError: An error occurred while calling
None.org.apache.spark.api.python.PythonFunction. Trace:
py4j.Py4JException: Constructor
org.apache.spark.api.python.PythonFunction([class [B, class
java.util.HashMap, class java.util.ArrayList, class java.lang.String, class
java.lang.String, class java.util.ArrayList, class
org.apache.spark.api.python.PythonAccumulatorV2]) does not exist
at
py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:179)
at
py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:196)
at py4j.Gateway.invoke(Gateway.java:235)
at
py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:745)


I installed Spark 2.0.2 on windows 10 and code is as below:-

> sc = SparkContext.getOrCreate()
> sc
> import numpy as np
> TOTAL = 1000000
> dots = sc.parallelize([2.0 * np.random.random(2) - 1.0 for i in
> range(TOTAL)]).cache()
> print("Number of random points:", dots.count())
> stats = dots.stats()
> print('Mean:', stats.mean())
> print('stdev:', stats.stdev())


Getting error , when I am running Numphy code.
Please tell me what's wrong in this.

Reply via email to