The cause of error is 'java.lang.OutOfMemoryError: Java heap space' which could be coming from pyspark/sql/utils.py > deco
You may want to monitor spark ui to figure out memory usage and where the job fails. It would be helpful if you share your setup to provide you further suggestion. A quick fix in this situation is to increase driver/executor memory. > On May 12, 2017, at 6:44 AM, arijit chakraborty <ak...@hotmail.com> wrote: > > Hi, > > > > I was testing my code with 10,000 observations. But the code is failing. Please find the log below. The code is working perfectly with smaller datasets. In R it's taking around 2 hours to run this model. > > > I'm using 4 core PC and running spark through jupyter notebook. > > In python: > > --------------------------------------------------------------------------- > Py4JJavaError Traceback (most recent call last) > <ipython-input-30-9e495a76f74c> in <module>() > ----> 1 get_ipython().run_cell_magic(u'time', u'', u'scriptUrl = "D:/DEV1/DMLREPO/DEV_v2.dml" #findBestSplitSC,tester,findBestSplitSC_v1 \nscript = sml.dml(scriptUrl).input(dframe = X_df,status = status_df,input_val = inputs_df,ntree = 300, mtry = 9).output("check_func") # , status = status_df, input_val = inputs_df\nbeta = ml.execute (script).get("check_func")\n#beta') > > C:\Anaconda2\lib\site-packages\IPython\core\interactiveshell.pyc in run_cell_magic(self, magic_name, line, cell) > 2118 magic_arg_s = self.var_expand(line, stack_depth) > 2119 with self.builtin_trap: > -> 2120 result = fn(magic_arg_s, cell) > 2121 return result > 2122 > > <decorator-gen-61> in time(self, line, cell, local_ns) > > C:\Anaconda2\lib\site-packages\IPython\core\magic.pyc in <lambda>(f, *a, **k) > 191 # but it's overkill for just that one bit of state. > 192 def magic_deco(arg): > --> 193 call = lambda f, *a, **k: f(*a, **k) > 194 > 195 if callable(arg): > > C:\Anaconda2\lib\site-packages\IPython\core\magics\execution.pyc in time (self, line, cell, local_ns) > 1175 else: > 1176 st = clock2() > -> 1177 exec(code, glob, local_ns) > 1178 end = clock2() > 1179 out = None > > <timed exec> in <module>() > > C:\Anaconda2\lib\site-packages\systemml\mlcontext.pyc in execute(self, script) > 338 for val in script._output: > 339 script_java.out(val) > --> 340 return MLResults(self._ml.execute(script_java), self._sc) > 341 > 342 def setStatistics(self, statistics): > > C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\java_gateway.py in __call__(self, *args) > 1131 answer = self.gateway_client.send_command(command) > 1132 return_value = get_return_value( > -> 1133 answer, self.gateway_client, self.target_id, self.name) > 1134 > 1135 for temp_arg in temp_args: > > C:\spark/python\pyspark\sql\utils.pyc in deco(*a, **kw) > 61 def deco(*a, **kw): > 62 try: > ---> 63 return f(*a, **kw) > 64 except py4j.protocol.Py4JJavaError as e: > 65 s = e.java_exception.toString() > > C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name) > 317 raise Py4JJavaError( > 318 "An error occurred while calling {0}{1}{2}.\n". > --> 319 format(target_id, ".", name), value) > 320 else: > 321 raise Py4JError( > > Py4JJavaError: An error occurred while calling o33.execute. > : java.lang.OutOfMemoryError: Java heap space > > > > The CMD Log Error: > > > ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:57055) > Traceback (most recent call last): > File "C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\java_gateway.py", line 963, in start > self.socket.connect((self.address, self.port)) > File "C:\Anaconda2\lib\socket.py", line 228, in meth > return getattr(self._sock,name)(*args) > error: [Errno 10061] No connection could be made because the target machine actively refused it > > > > Thanks a lot! > > Arijit