Re: [Scikit-learn-general] ElasticnetCV crash on 64-bit Linux

John Mark Agosta Fri, 22 Aug 2014 07:50:00 -0700

Laszlo -

What Sklearn version are you using?   Elastic net in v 0.14.0 was
significantly improved in v0.15


What alpha are you using?  Sklearn is known to fail for alpha == 0.

-jm

______
John Mark Agosta
650 465-4707
[email protected]  *"Unpredictable consequences are the most
expected thing on earth."*
*
                                                 --- B. Latour*


On Thu, Aug 21, 2014 at 4:28 PM, László Sándor <[email protected]> wrote:

> Hi,
>
> OS denied me memory upon running CV in the script below. I am still
> investigating whether it was a mistake of the scheduler on the server, but
> I think the process had access to 240 GB memory but reproducibly crashes
> upon using 120035176K with the error message below. I paste my conda info
> output at the very end.
>
>
> Could it be an sklearn/NumPy/Python issue?
>
>
> Thanks!
>
>
> Laszlo
>
>
>
>
> ******* Script:
>
> #import scipy as sp
>
> import numpy as np
>
> import pandas as pd
>
> import multiprocessing as mp
>
> # import iopro
>
> from sklearn import grid_search
>
> from sklearn import cross_validation
>
> from sklearn.preprocessing import StandardScaler
>
> # from sklearn.linear_model import SGDClassifier
>
> from sklearn.linear_model import ElasticNetCV
>
> from sklearn.externals import joblib
>
> def main():
>
>     print("Started.")
>
>     # n = 10**6
>
> #    notreatadapter =
> iopro.text_adapter('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.csv',
> parser='csv')
>
> #    X = notreatadapter[1:][:] #[0:n]
>
> #    y = notreatadapter[0][:] #[0:n]
>
>     # notreatdata =
> pd.read_stata('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.dta')
>
>     # notreatdata = notreatdata.iloc[:10000,:]
>
>     # X = notreatdata.iloc[:,1:]
>
>     # y = notreatdata.iloc[:,0]
>
>     X =
> pd.read_csv('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.csv')#,nrows=833333)
>
>     y = X.iloc[:,0].values #
> pd.read_csv('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/absnt.csv',
> parser='csv',usecols=[0])
>
>     X = X.iloc[:,1:].values
>
>     n = y.shape[0]
>
>     print("Data lodaded.")
>
>     X_train, X_test, y_train, y_test =
> cross_validation.train_test_split(X, y, test_size=0.4, random_state=0)
>
>     print("Data split.")
>
>     scaler = StandardScaler()
>
>     scaler.fit(X_train)  # Don't cheat - fit only on training data
>
>     X_train = scaler.transform(X_train)
>
>     X_test = scaler.transform(X_test)  # apply same transformation to test
> data
>
>     print("Data scaled.")
>
>     # build a model
>
>
> joblib.dump(X_train,'/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreatX')
>
>
> joblib.dump(y_train,'/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreaty')
>
>     print("Data dumped.")
>
>     X_train =
> joblib.load('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreatX',
> mmap_mode='r+')
>
>     y_train =
> joblib.load('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreaty',
> mmap_mode='r+')
>
>     print("Data reloaded and mmapped.")
>
>     # model = SGDClassifier(penalty='elasticnet',n_iter = np.ceil(10**6 /
> float(n)),shuffle=True)
>
>     print("CV starts.")
>
>     en = ElasticNetCV(l1_ratio=[.05, .15, .5, .7, .9, .95, .99,
> 1],n_jobs=-1)
>
>     en.fit(X_train, y_train)
>
>     print("Best for alphas:")
>
>     print(en.alpha_)
>
>     print("Best l1-ratio:")
>
>     print(en.l1_ratio_)
>
>     print("Coefficients:")
>
>     print(en.coef_)
>
>     #evaluate
>
>     y_pred = en.predict(X_test)
>
>     test_score = r2_score(y_test, y_pred)
>
>     print("Test estimator has R^2 %2.2f in the test sample.",test_score)
>
> if __name__=='__main__':
>
>     mp.freeze_support()
>
>     main()
>
>
>
>
> *** Error:
>
> Traceback (most recent call last):
>
>   File
> "/n/chetty/Users/lsandor/sweden/.code/controls/lasso/scikit_notreat_predictors_en.py",
> line 67, in <module>
>
>     main()
>
>   File
> "/n/chetty/Users/lsandor/sweden/.code/controls/lasso/scikit_notreat_predictors_en.py",
> line 50, in main
>
>     en.fit(X_train, y_train)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/linear_model/coordinate_descent.py",
> line 1101, in fit
>
>     mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(jobs)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py",
> line 604, in __call__
>
>     self._pool = MemmapingPool(n_jobs, **poolargs)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py",
> line 559, in __init__
>
>     super(MemmapingPool, self).__init__(**poolargs)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py",
> line 400, in __init__
>
>     super(PicklingPool, self).__init__(**poolargs)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/pool.py",
> line 159, in __init__
>
>     self._repopulate_pool()
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/pool.py",
> line 223, in _repopulate_pool
>
>     w.start()
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/process.py",
> line 130, in start
>
>     self._popen = Popen(self)
>
>   File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/forking.py",
> line 121, in __init__
>
>     self.pid = os.fork()
>
> OSError: [Errno 12] Cannot allocate memory
>
>
>
> *** Conda:
>
> Current conda install:
>
>              platform : linux-64
>
>         conda version : 3.4.1
>
>        python version : 2.7.6.final.0
>
>      root environment : /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x
>  (read only)
>
>   default environment : /n/chetty/Users/lsandor/envs/laszlo
>
>      envs directories : /n/chetty/Users/lsandor/envs
>
>
> /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x/envs
>
>         package cache : /n/chetty/Users/lsandor/envs/.pkgs
>
>
> /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x/pkgs
>
>          channel URLs : http://repo.continuum.io/pkgs/free/linux-64/
>
>                         http://repo.continuum.io/pkgs/pro/linux-64/
>
>
>           config file : None
>
>
>     is foreign system : False
>
>
>
> ------------------------------------------------------------------------------
> Slashdot TV.
> Video for Nerds.  Stuff that matters.
> http://tv.slashdot.org/
> _______________________________________________
> Scikit-learn-general mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general
>
>

------------------------------------------------------------------------------
Slashdot TV.  
Video for Nerds.  Stuff that matters.
http://tv.slashdot.org/

_______________________________________________
Scikit-learn-general mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/scikit-learn-general

Re: [Scikit-learn-general] ElasticnetCV crash on 64-bit Linux

Reply via email to