Re: [Scikit-learn-general] ElasticnetCV crash on 64-bit Linux

László Sándor Fri, 22 Aug 2014 03:28:08 -0700

Thanks, I did not know if it was a proper bug,  I did not dig very deep. This 
happened on 0.15.0b2.


So what would be meaningful to try, and then report?

On Fri, Aug 22, 2014 at 5:41 AM, Alexandre Gramfort
<alexandre.gramf...@telecom-paristech.fr> wrote:

> hi Laszlo,
> thanks for using the bug tracker and not the ML to report issues.
> Also avoid pasting raw text/code in emails but use services like 
> gist.github.com
> what version of sklearn are you using? ElasticNetCV uses threads in v0.15
> so the memory problem should not happen anymore.
> Alex
> On Fri, Aug 22, 2014 at 1:28 AM, László Sándor <sand...@gmail.com> wrote:
>> Hi,
>>
>> OS denied me memory upon running CV in the script below. I am still
>> investigating whether it was a mistake of the scheduler on the server, but I
>> think the process had access to 240 GB memory but reproducibly crashes upon
>> using 120035176K with the error message below. I paste my conda info output
>> at the very end.
>>
>>
>> Could it be an sklearn/NumPy/Python issue?
>>
>>
>> Thanks!
>>
>>
>> Laszlo
>>
>>
>>
>>
>> ******* Script:
>>
>> #import scipy as sp
>>
>> import numpy as np
>>
>> import pandas as pd
>>
>> import multiprocessing as mp
>>
>> # import iopro
>>
>> from sklearn import grid_search
>>
>> from sklearn import cross_validation
>>
>> from sklearn.preprocessing import StandardScaler
>>
>> # from sklearn.linear_model import SGDClassifier
>>
>> from sklearn.linear_model import ElasticNetCV
>>
>> from sklearn.externals import joblib
>>
>> def main():
>>
>>     print("Started.")
>>
>>     # n = 10**6
>>
>> #    notreatadapter =
>> iopro.text_adapter('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.csv',
>> parser='csv')
>>
>> #    X = notreatadapter[1:][:] #[0:n]
>>
>> #    y = notreatadapter[0][:] #[0:n]
>>
>>     # notreatdata =
>> pd.read_stata('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.dta')
>>
>>     # notreatdata = notreatdata.iloc[:10000,:]
>>
>>     # X = notreatdata.iloc[:,1:]
>>
>>     # y = notreatdata.iloc[:,0]
>>
>>     X =
>> pd.read_csv('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.csv')#,nrows=833333)
>>
>>     y = X.iloc[:,0].values #
>> pd.read_csv('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/absnt.csv',
>> parser='csv',usecols=[0])
>>
>>     X = X.iloc[:,1:].values
>>
>>     n = y.shape[0]
>>
>>     print("Data lodaded.")
>>
>>     X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
>> y, test_size=0.4, random_state=0)
>>
>>     print("Data split.")
>>
>>     scaler = StandardScaler()
>>
>>     scaler.fit(X_train)  # Don't cheat - fit only on training data
>>
>>     X_train = scaler.transform(X_train)
>>
>>     X_test = scaler.transform(X_test)  # apply same transformation to test
>> data
>>
>>     print("Data scaled.")
>>
>>     # build a model
>>
>>
>> joblib.dump(X_train,'/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreatX')
>>
>>
>> joblib.dump(y_train,'/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreaty')
>>
>>     print("Data dumped.")
>>
>>     X_train =
>> joblib.load('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreatX',
>> mmap_mode='r+')
>>
>>     y_train =
>> joblib.load('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreaty',
>> mmap_mode='r+')
>>
>>     print("Data reloaded and mmapped.")
>>
>>     # model = SGDClassifier(penalty='elasticnet',n_iter = np.ceil(10**6 /
>> float(n)),shuffle=True)
>>
>>     print("CV starts.")
>>
>>     en = ElasticNetCV(l1_ratio=[.05, .15, .5, .7, .9, .95, .99,
>> 1],n_jobs=-1)
>>
>>     en.fit(X_train, y_train)
>>
>>     print("Best for alphas:")
>>
>>     print(en.alpha_)
>>
>>     print("Best l1-ratio:")
>>
>>     print(en.l1_ratio_)
>>
>>     print("Coefficients:")
>>
>>     print(en.coef_)
>>
>>     #evaluate
>>
>>     y_pred = en.predict(X_test)
>>
>>     test_score = r2_score(y_test, y_pred)
>>
>>     print("Test estimator has R^2 %2.2f in the test sample.",test_score)
>>
>> if __name__=='__main__':
>>
>>     mp.freeze_support()
>>
>>     main()
>>
>>
>>
>>
>> *** Error:
>>
>> Traceback (most recent call last):
>>
>>   File
>> "/n/chetty/Users/lsandor/sweden/.code/controls/lasso/scikit_notreat_predictors_en.py",
>> line 67, in <module>
>>
>>     main()
>>
>>   File
>> "/n/chetty/Users/lsandor/sweden/.code/controls/lasso/scikit_notreat_predictors_en.py",
>> line 50, in main
>>
>>     en.fit(X_train, y_train)
>>
>>   File
>> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/linear_model/coordinate_descent.py",
>> line 1101, in fit
>>
>>     mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(jobs)
>>
>>   File
>> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py",
>> line 604, in __call__
>>
>>     self._pool = MemmapingPool(n_jobs, **poolargs)
>>
>>   File
>> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py",
>> line 559, in __init__
>>
>>     super(MemmapingPool, self).__init__(**poolargs)
>>
>>   File
>> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py",
>> line 400, in __init__
>>
>>     super(PicklingPool, self).__init__(**poolargs)
>>
>>   File
>> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/pool.py",
>> line 159, in __init__
>>
>>     self._repopulate_pool()
>>
>>   File
>> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/pool.py",
>> line 223, in _repopulate_pool
>>
>>     w.start()
>>
>>   File
>> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/process.py",
>> line 130, in start
>>
>>     self._popen = Popen(self)
>>
>>   File
>> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/forking.py",
>> line 121, in __init__
>>
>>     self.pid = os.fork()
>>
>> OSError: [Errno 12] Cannot allocate memory
>>
>>
>>
>> *** Conda:
>>
>> Current conda install:
>>
>>              platform : linux-64
>>
>>         conda version : 3.4.1
>>
>>        python version : 2.7.6.final.0
>>
>>      root environment : /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x
>> (read only)
>>
>>   default environment : /n/chetty/Users/lsandor/envs/laszlo
>>
>>      envs directories : /n/chetty/Users/lsandor/envs
>>
>>
>> /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x/envs
>>
>>         package cache : /n/chetty/Users/lsandor/envs/.pkgs
>>
>>
>> /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x/pkgs
>>
>>          channel URLs : http://repo.continuum.io/pkgs/free/linux-64/
>>
>>                         http://repo.continuum.io/pkgs/pro/linux-64/
>>
>>
>>           config file : None
>>
>>
>>     is foreign system : False
>>
>>
>>
>> ------------------------------------------------------------------------------
>> Slashdot TV.
>> Video for Nerds.  Stuff that matters.
>> http://tv.slashdot.org/
>> _______________________________________________
>> Scikit-learn-general mailing list
>> Scikit-learn-general@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general
>>
> ------------------------------------------------------------------------------
> Slashdot TV.  
> Video for Nerds.  Stuff that matters.
> http://tv.slashdot.org/
> _______________________________________________
> Scikit-learn-general mailing list
> Scikit-learn-general@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general

------------------------------------------------------------------------------
Slashdot TV.  
Video for Nerds.  Stuff that matters.
http://tv.slashdot.org/

_______________________________________________
Scikit-learn-general mailing list
Scikit-learn-general@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/scikit-learn-general

Re: [Scikit-learn-general] ElasticnetCV crash on 64-bit Linux

Reply via email to