Re: what does this error mean?

2017-05-13 Thread Zeming Yu
Another error. Anyone have any idea?

this one happens when I tried to convert a spark dataframe to pandas:

---Py4JError
Traceback (most recent call
last)/home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/dataframe.py
in collect(self)390 with SCCallSiteSync(self._sc) as
css:--> 391 port = self._jdf.collectToPython()392
   return list(_load_from_socket(port,
BatchedSerializer(PickleSerializer(
/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py
in __call__(self, *args)   1132 return_value =
get_return_value(-> 1133 answer, self.gateway_client,
self.target_id, self.name)   1134
/home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/utils.py in
deco(*a, **kw) 62 try:---> 63 return f(*a,
**kw) 64 except py4j.protocol.Py4JJavaError as e:
/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/protocol.py in
get_return_value(answer, gateway_client, target_id, name)326
  "An error occurred while calling {0}{1}{2}".--> 327
   format(target_id, ".", name))328 else:
Py4JError: An error occurred while calling o69.collectToPython

During handling of the above exception, another exception occurred:
IndexErrorTraceback (most recent call
last)/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py
in _get_connection(self)826 try:--> 827
connection = self.deque.pop()828 except IndexError:
IndexError: pop from an empty deque

During handling of the above exception, another exception occurred:
ConnectionRefusedErrorTraceback (most recent call
last)/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py
in start(self)962 try:--> 963
self.socket.connect((self.address, self.port))964
self.is_connected = True
ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:
Py4JNetworkError  Traceback (most recent call
last) in ()  7
   'lead_time', 'dep_weekday', 'dep_weeknum',  8
   'days_to_last_holiday', 'days_to_next_holiday',> 9
'duration_minutes', 'stop_minutes').toPandas()
10 flight_pd.head()
/home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/dataframe.py
in toPandas(self)   1583 """   1584 import pandas as
pd-> 1585 return pd.DataFrame.from_records(self.collect(),
columns=self.columns)   15861587
##
/home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/dataframe.py
in collect(self)389 """390 with
SCCallSiteSync(self._sc) as css:--> 391 port =
self._jdf.collectToPython()392 return
list(_load_from_socket(port, BatchedSerializer(PickleSerializer(
 393
/home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/traceback_utils.py
in __exit__(self, type, value, tb) 76
SCCallSiteSync._spark_stack_depth -= 1 77 if
SCCallSiteSync._spark_stack_depth == 0:---> 78
self._context._jsc.setCallSite(None)
/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py
in __call__(self, *args)   1129 proto.END_COMMAND_PART
1130 -> 1131 answer =
self.gateway_client.send_command(command)   1132 return_value
= get_return_value(   1133 answer, self.gateway_client,
self.target_id, self.name)
/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py
in send_command(self, command, retry, binary)879  if
`binary` is `True`.880 """--> 881 connection =
self._get_connection()882 try:883 response
= connection.send_command(command)
/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py
in _get_connection(self)827 connection =
self.deque.pop()828 except IndexError:--> 829
connection = self._create_connection()830 return
connection831
/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py
in _create_connection(self)833 connection =
GatewayConnection(834 self.gateway_parameters,
self.gateway_property)--> 835 connection.start()836
 return connection837
/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py
in start(self)968 "server
({0}:{1})".format(self.address, self.port)969
logger.exception(msg)--> 970 raise Py4JNetworkError(msg,
e)971 972 def close(self, reset=False):
Py4JNetworkError: An error occurred while trying to connect to the
Java server (127.0.0.1:34166)


On Sat, May 13, 2017 at 10:21 PM, Zeming Yu  wrote:

> My code runs error 

what does this error mean?

2017-05-13 Thread Zeming Yu
My code runs error free on my local pc. Just tried running the same code on
a ubuntu machine on ec2, and got the error below. Any idea where to start
in terms of debugging?

---Py4JError
Traceback (most recent call
last) in ()> 1
output.show(2)
/home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/dataframe.py
in show(self, n, truncate)316 """317 if
isinstance(truncate, bool) and truncate:--> 318
print(self._jdf.showString(n, 20))319 else:320
print(self._jdf.showString(n, int(truncate)))
/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/java_gateway.py
in __call__(self, *args)   1131 answer =
self.gateway_client.send_command(command)   1132 return_value
= get_return_value(-> 1133 answer, self.gateway_client,
self.target_id, self.name)   11341135 for temp_arg in
temp_args:
/home/ubuntu/spark-2.1.1-bin-hadoop2.7/python/pyspark/sql/utils.py in
deco(*a, **kw) 61 def deco(*a, **kw): 62 try:--->
63 return f(*a, **kw) 64 except
py4j.protocol.Py4JJavaError as e: 65 s =
e.java_exception.toString()
/home/ubuntu/anaconda3/lib/python3.5/site-packages/py4j/protocol.py in
get_return_value(answer, gateway_client, target_id, name)325
  raise Py4JError(326 "An error occurred while
calling {0}{1}{2}".--> 327 format(target_id, ".",
name))328 else:329 type = answer[1]
Py4JError: An error occurred while calling o648.showString