Laszlo -
What Sklearn version are you using? Elastic net in v 0.14.0 was
significantly improved in v0.15
What alpha are you using? Sklearn is known to fail for alpha == 0.
-jm
______
John Mark Agosta
650 465-4707
johnmark.ago...@gmail.com *"Unpredictable consequences are the most
expected thing on earth."*
*
--- B. Latour*
On Thu, Aug 21, 2014 at 4:28 PM, László Sándor <sand...@gmail.com> wrote:
> Hi,
>
> OS denied me memory upon running CV in the script below. I am still
> investigating whether it was a mistake of the scheduler on the server, but
> I think the process had access to 240 GB memory but reproducibly crashes
> upon using 120035176K with the error message below. I paste my conda info
> output at the very end.
>
>
> Could it be an sklearn/NumPy/Python issue?
>
>
> Thanks!
>
>
> Laszlo
>
>
>
>
> ******* Script:
>
> #import scipy as sp
>
> import numpy as np
>
> import pandas as pd
>
> import multiprocessing as mp
>
> # import iopro
>
> from sklearn import grid_search
>
> from sklearn import cross_validation
>
> from sklearn.preprocessing import StandardScaler
>
> # from sklearn.linear_model import SGDClassifier
>
> from sklearn.linear_model import ElasticNetCV
>
> from sklearn.externals import joblib
>
> def main():
>
> print("Started.")
>
> # n = 10**6
>
> # notreatadapter =
> iopro.text_adapter('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.csv',
> parser='csv')
>
> # X = notreatadapter[1:][:] #[0:n]
>
> # y = notreatadapter[0][:] #[0:n]
>
> # notreatdata =
> pd.read_stata('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.dta')
>
> # notreatdata = notreatdata.iloc[:10000,:]
>
> # X = notreatdata.iloc[:,1:]
>
> # y = notreatdata.iloc[:,0]
>
> X =
> pd.read_csv('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.csv')#,nrows=833333)
>
> y = X.iloc[:,0].values #
> pd.read_csv('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/absnt.csv',
> parser='csv',usecols=[0])
>
> X = X.iloc[:,1:].values
>
> n = y.shape[0]
>
> print("Data lodaded.")
>
> X_train, X_test, y_train, y_test =
> cross_validation.train_test_split(X, y, test_size=0.4, random_state=0)
>
> print("Data split.")
>
> scaler = StandardScaler()
>
> scaler.fit(X_train) # Don't cheat - fit only on training data
>
> X_train = scaler.transform(X_train)
>
> X_test = scaler.transform(X_test) # apply same transformation to test
> data
>
> print("Data scaled.")
>
> # build a model
>
>
> joblib.dump(X_train,'/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreatX')
>
>
> joblib.dump(y_train,'/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreaty')
>
> print("Data dumped.")
>
> X_train =
> joblib.load('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreatX',
> mmap_mode='r+')
>
> y_train =
> joblib.load('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreaty',
> mmap_mode='r+')
>
> print("Data reloaded and mmapped.")
>
> # model = SGDClassifier(penalty='elasticnet',n_iter = np.ceil(10**6 /
> float(n)),shuffle=True)
>
> print("CV starts.")
>
> en = ElasticNetCV(l1_ratio=[.05, .15, .5, .7, .9, .95, .99,
> 1],n_jobs=-1)
>
> en.fit(X_train, y_train)
>
> print("Best for alphas:")
>
> print(en.alpha_)
>
> print("Best l1-ratio:")
>
> print(en.l1_ratio_)
>
> print("Coefficients:")
>
> print(en.coef_)
>
> #evaluate
>
> y_pred = en.predict(X_test)
>
> test_score = r2_score(y_test, y_pred)
>
> print("Test estimator has R^2 %2.2f in the test sample.",test_score)
>
> if __name__=='__main__':
>
> mp.freeze_support()
>
> main()
>
>
>
>
> *** Error:
>
> Traceback (most recent call last):
>
> File
> "/n/chetty/Users/lsandor/sweden/.code/controls/lasso/scikit_notreat_predictors_en.py",
> line 67, in <module>
>
> main()
>
> File
> "/n/chetty/Users/lsandor/sweden/.code/controls/lasso/scikit_notreat_predictors_en.py",
> line 50, in main
>
> en.fit(X_train, y_train)
>
> File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/linear_model/coordinate_descent.py",
> line 1101, in fit
>
> mse_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(jobs)
>
> File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py",
> line 604, in __call__
>
> self._pool = MemmapingPool(n_jobs, **poolargs)
>
> File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py",
> line 559, in __init__
>
> super(MemmapingPool, self).__init__(**poolargs)
>
> File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py",
> line 400, in __init__
>
> super(PicklingPool, self).__init__(**poolargs)
>
> File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/pool.py",
> line 159, in __init__
>
> self._repopulate_pool()
>
> File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/pool.py",
> line 223, in _repopulate_pool
>
> w.start()
>
> File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/process.py",
> line 130, in start
>
> self._popen = Popen(self)
>
> File
> "/n/chetty/Users/lsandor/envs/laszlo/lib/python2.7/multiprocessing/forking.py",
> line 121, in __init__
>
> self.pid = os.fork()
>
> OSError: [Errno 12] Cannot allocate memory
>
>
>
> *** Conda:
>
> Current conda install:
>
> platform : linux-64
>
> conda version : 3.4.1
>
> python version : 2.7.6.final.0
>
> root environment : /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x
> (read only)
>
> default environment : /n/chetty/Users/lsandor/envs/laszlo
>
> envs directories : /n/chetty/Users/lsandor/envs
>
>
> /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x/envs
>
> package cache : /n/chetty/Users/lsandor/envs/.pkgs
>
>
> /n/sw/fasrcsw/apps/Core/Anaconda/1.9.2-fasrc01/x/pkgs
>
> channel URLs : http://repo.continuum.io/pkgs/free/linux-64/
>
> http://repo.continuum.io/pkgs/pro/linux-64/
>
>
> config file : None
>
>
> is foreign system : False
>
>
>
> ------------------------------------------------------------------------------
> Slashdot TV.
> Video for Nerds. Stuff that matters.
> http://tv.slashdot.org/
> _______________________________________________
> Scikit-learn-general mailing list
> Scikit-learn-general@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general
>
>
------------------------------------------------------------------------------
Slashdot TV.
Video for Nerds. Stuff that matters.
http://tv.slashdot.org/
_______________________________________________
Scikit-learn-general mailing list
Scikit-learn-general@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/scikit-learn-general