hi Laszlo,

thanks for using the bug tracker and not the ML to report issues.
Also avoid pasting raw text/code in emails but use services like gist.github.com

what version of sklearn are you using? ElasticNetCV uses threads in v0.15
so the memory problem should not happen anymore.

Alex


On Fri, Aug 22, 2014 at 1:28 AM, László Sándor <sand...@gmail.com> wrote:
> Hi,
>
> OS denied me memory upon running CV in the script below. I am still
> investigating whether it was a mistake of the scheduler on the server, but I
> think the process had access to 240 GB memory but reproducibly crashes upon
> using 120035176K with the error message below. I paste my conda info output
> at the very end.
>
>
> Could it be an sklearn/NumPy/Python issue?
>
>
> Thanks!
>
>
> Laszlo
>
>
>
>
> ******* Script:
>
> #import scipy as sp
>
> import numpy as np
>
> import pandas as pd
>
> import multiprocessing as mp
>
> # import iopro
>
> from sklearn import grid_search
>
> from sklearn import cross_validation
>
> from sklearn.preprocessing import StandardScaler
>
> # from sklearn.linear_model import SGDClassifier
>
> from sklearn.linear_model import ElasticNetCV
>
> from sklearn.externals import joblib
>
> def main():
>
>     print("Started.")
>
>     # n = 10**6
>
> #    notreatadapter =
> iopro.text_adapter('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.csv',
> parser='csv')
>
> #    X = notreatadapter[1:][:] #[0:n]
>
> #    y = notreatadapter[0][:] #[0:n]
>
>     # notreatdata =
> pd.read_stata('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.dta')
>
>     # notreatdata = notreatdata.iloc[:10000,:]
>
>     # X = notreatdata.iloc[:,1:]
>
>     # y = notreatdata.iloc[:,0]
>
>     X =
> pd.read_csv('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.csv')#,nrows=833333)
>
>     y = X.iloc[:,0].values #
> pd.read_csv('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/absnt.csv',
> parser='csv',usecols=[0])
>
>     X = X.iloc[:,1:].values
>
>     n = y.shape[0]
>
>     print("Data lodaded.")
>
>     X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
> y, test_size=0.4, random_state=0)
>
>     print("Data split.")
>
>     scaler = StandardScaler()
>
>     scaler.fit(X_train)  # Don't cheat - fit only on training data
>
>     X_train = scaler.transform(X_train)
>
>     X_test = scaler.transform(X_test)  # apply same transformation to test
> data
>
>     print("Data scaled.")
>
>     # build a model
>
>
> joblib.dump(X_train,'/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreatX')
>
>
> joblib.dump(y_train,'/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreaty')
>
>     print("Data dumped.")
>
>     X_train =
> joblib.load('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreatX',
> mmap_mode='r+')
>
>     y_train =
> joblib.load('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreaty',
> mmap_mode='r+')
>
>     print("Data reloaded and mmapped.")
>
>     # model = SGDClassifier(penalty='elasticnet',n_iter = np.ceil(10**6 /
> float(n)),shuffle=True)
>
>     print("CV starts.")
>
>     en = ElasticNetCV(l1_ratio=[.05, .15, .5, .7, .9, .95, .99,
> 1],n_jobs=-1)
>
>     en.fit(X_train, y_train)
>
>     print("Best for alphas:")
>
>     print(en.alpha_)
>
>     print("Best l1-ratio:")
>
>     print(en.l1_ratio_)
>
>     print("Coefficients:")
>
>     print(en.coef_)
>
>     #evaluate
>
>     y_pred = en.predict(X_test)
>
>     test_score = r2_score(y_test, y_pred)
>
>     print("Test estimator has R^2 %2.2f in the test sample.",test_score)
>
> if __name__=='__main__':
>
>     mp.freeze_support()
>
>     main()
>
>
>
>
> *** Error:
>
> Traceback (most recent call last):
>
>   File
> "/n/chetty/Users/lsandor/sweden/.code/controls/lasso/scikit_notreat_predictors_en.py",
> line 67, in <module>
>
>     main()
>
>   File
> "/n/chetty/Users/lsandor/sweden/.code/controls/lasso/scikit_notreat_predictors_en.py",
> line 50, in main
>
>     en.fit(X_train, y_train)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/linear_model/coordinate_descent.py",
> line 1101, in fit
>
>     mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(jobs)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py",
> line 604, in __call__
>
>     self._pool = MemmapingPool(n_jobs, **poolargs)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py",
> line 559, in __init__
>
>     super(MemmapingPool, self).__init__(**poolargs)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py",
> line 400, in __init__
>
>     super(PicklingPool, self).__init__(**poolargs)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/pool.py",
> line 159, in __init__
>
>     self._repopulate_pool()
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/pool.py",
> line 223, in _repopulate_pool
>
>     w.start()
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/process.py",
> line 130, in start
>
>     self._popen = Popen(self)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/forking.py",
> line 121, in __init__
>
>     self.pid = os.fork()
>
> OSError: [Errno 12] Cannot allocate memory
>
>
>
> *** Conda:
>
> Current conda install:
>
>              platform : linux-64
>
>         conda version : 3.4.1
>
>        python version : 2.7.6.final.0
>
>      root environment : /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x
> (read only)
>
>   default environment : /n/chetty/Users/lsandor/envs/laszlo
>
>      envs directories : /n/chetty/Users/lsandor/envs
>
>
> /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x/envs
>
>         package cache : /n/chetty/Users/lsandor/envs/.pkgs
>
>
> /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x/pkgs
>
>          channel URLs : http://repo.continuum.io/pkgs/free/linux-64/
>
>                         http://repo.continuum.io/pkgs/pro/linux-64/
>
>
>           config file : None
>
>
>     is foreign system : False
>
>
>
> ------------------------------------------------------------------------------
> Slashdot TV.
> Video for Nerds.  Stuff that matters.
> http://tv.slashdot.org/
> _______________________________________________
> Scikit-learn-general mailing list
> Scikit-learn-general@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general
>

------------------------------------------------------------------------------
Slashdot TV.  
Video for Nerds.  Stuff that matters.
http://tv.slashdot.org/
_______________________________________________
Scikit-learn-general mailing list
Scikit-learn-general@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/scikit-learn-general

Reply via email to