Hi,
I was testing my code with 10,000 observations. But the code is failing. Please find the log below. The code is working perfectly with smaller datasets. In R it's taking around 2 hours to run this model. I'm using 4 core PC and running spark through jupyter notebook. In python: --------------------------------------------------------------------------- Py4JJavaError Traceback (most recent call last) <ipython-input-30-9e495a76f74c> in <module>() ----> 1 get_ipython().run_cell_magic(u'time', u'', u'scriptUrl = "D:/DEV1/DMLREPO/DEV_v2.dml" #findBestSplitSC,tester,findBestSplitSC_v1\nscript = sml.dml(scriptUrl).input(dframe = X_df,status = status_df,input_val = inputs_df,ntree = 300, mtry = 9).output("check_func") # , status = status_df, input_val = inputs_df\nbeta = ml.execute(script).get("check_func")\n#beta') C:\Anaconda2\lib\site-packages\IPython\core\interactiveshell.pyc in run_cell_magic(self, magic_name, line, cell) 2118 magic_arg_s = self.var_expand(line, stack_depth) 2119 with self.builtin_trap: -> 2120 result = fn(magic_arg_s, cell) 2121 return result 2122 <decorator-gen-61> in time(self, line, cell, local_ns) C:\Anaconda2\lib\site-packages\IPython\core\magic.pyc in <lambda>(f, *a, **k) 191 # but it's overkill for just that one bit of state. 192 def magic_deco(arg): --> 193 call = lambda f, *a, **k: f(*a, **k) 194 195 if callable(arg): C:\Anaconda2\lib\site-packages\IPython\core\magics\execution.pyc in time(self, line, cell, local_ns) 1175 else: 1176 st = clock2() -> 1177 exec(code, glob, local_ns) 1178 end = clock2() 1179 out = None <timed exec> in <module>() C:\Anaconda2\lib\site-packages\systemml\mlcontext.pyc in execute(self, script) 338 for val in script._output: 339 script_java.out(val) --> 340 return MLResults(self._ml.execute(script_java), self._sc) 341 342 def setStatistics(self, statistics): C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\java_gateway.py in __call__(self, *args) 1131 answer = self.gateway_client.send_command(command) 1132 return_value = get_return_value( -> 1133 answer, self.gateway_client, self.target_id, self.name) 1134 1135 for temp_arg in temp_args: C:\spark/python\pyspark\sql\utils.pyc in deco(*a, **kw) 61 def deco(*a, **kw): 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString() C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name) 317 raise Py4JJavaError( 318 "An error occurred while calling {0}{1}{2}.\n". --> 319 format(target_id, ".", name), value) 320 else: 321 raise Py4JError( Py4JJavaError: An error occurred while calling o33.execute. : java.lang.OutOfMemoryError: Java heap space The CMD Log Error: ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java server (127.0.0.1:57055) Traceback (most recent call last): File "C:\spark\python\lib\py4j-0.10.4-src.zip\py4j\java_gateway.py", line 963, in start self.socket.connect((self.address, self.port)) File "C:\Anaconda2\lib\socket.py", line 228, in meth return getattr(self._sock,name)(*args) error: [Errno 10061] No connection could be made because the target machine actively refused it Thanks a lot! Arijit