Hi,


I was testing my code with 10,000 observations. But the code is failing. Please 
find the log below. The code is working perfectly with smaller datasets. In R 
it's taking around 2 hours to run this model.


I'm using 4 core PC and running spark through jupyter notebook.

In python:

---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-30-9e495a76f74c> in <module>()
----> 1 get_ipython().run_cell_magic(u'time', u'', u'scriptUrl = 
"D:/DEV1/DMLREPO/DEV_v2.dml"  
#findBestSplitSC,tester,findBestSplitSC_v1\nscript = 
sml.dml(scriptUrl).input(dframe = X_df,status = status_df,input_val = 
inputs_df,ntree = 300, mtry = 9).output("check_func")       # , status = 
status_df, input_val = inputs_df\nbeta = 
ml.execute(script).get("check_func")\n#beta')

C:\Anaconda2\lib\site-packages\IPython\core\interactiveshell.pyc in 
run_cell_magic(self, magic_name, line, cell)
   2118             magic_arg_s = self.var_expand(line, stack_depth)
   2119             with self.builtin_trap:
-> 2120                 result = fn(magic_arg_s, cell)
   2121             return result
   2122

<decorator-gen-61> in time(self, line, cell, local_ns)

C:\Anaconda2\lib\site-packages\IPython\core\magic.pyc in <lambda>(f, *a, **k)
    191     # but it's overkill for just that one bit of state.
    192     def magic_deco(arg):
--> 193         call = lambda f, *a, **k: f(*a, **k)
    194
    195         if callable(arg):

C:\Anaconda2\lib\site-packages\IPython\core\magics\execution.pyc in time(self, 
line, cell, local_ns)
   1175         else:
   1176             st = clock2()
-> 1177             exec(code, glob, local_ns)
   1178             end = clock2()
   1179             out = None

<timed exec> in <module>()

C:\Anaconda2\lib\site-packages\systemml\mlcontext.pyc in execute(self, script)
    338         for val in script._output:
    339             script_java.out(val)
--> 340         return MLResults(self._ml.execute(script_java), self._sc)
    341
    342     def setStatistics(self, statistics):

C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\java_gateway.py in __call__(self, 
*args)
   1131         answer = self.gateway_client.send_command(command)
   1132         return_value = get_return_value(
-> 1133             answer, self.gateway_client, self.target_id, self.name)
   1134
   1135         for temp_arg in temp_args:

C:\spark/python\pyspark\sql\utils.pyc in deco(*a, **kw)
     61     def deco(*a, **kw):
     62         try:
---> 63             return f(*a, **kw)
     64         except py4j.protocol.Py4JJavaError as e:
     65             s = e.java_exception.toString()

C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\protocol.py in 
get_return_value(answer, gateway_client, target_id, name)
    317                 raise Py4JJavaError(
    318                     "An error occurred while calling {0}{1}{2}.\n".
--> 319                     format(target_id, ".", name), value)
    320             else:
    321                 raise Py4JError(

Py4JJavaError: An error occurred while calling o33.execute.
: java.lang.OutOfMemoryError: Java heap space



The CMD Log Error:


ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java 
server (127.0.0.1:57055)
Traceback (most recent call last):
  File "C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\java_gateway.py", line 
963, in start
    self.socket.connect((self.address, self.port))
  File "C:\Anaconda2\lib\socket.py", line 228, in meth
    return getattr(self._sock,name)(*args)
error: [Errno 10061] No connection could be made because the target machine 
actively refused it



Thanks a lot!

Arijit

Reply via email to