Oh, Mailbox failed with the attachment, sorry, here it is.
On Fri, Aug 22, 2014 at 8:43 AM, László Sándor <sand...@gmail.com> wrote:
> Sorry, Olivier, I am not sure I follow. In any case, I am happy to update
> skl, I was only using the latest 64-bit Linux from the conda distro (though
> using MKL wanted to downgrade me to 0.14, I am pretty sure I precluded
> that).
>
> That said, maybe it’s good I did not file a proper bug report: The job
> completes with specifying n_jobs = 8 [the Windows test in July was an
> 8-core server], while n_jobs = -1 might have meant that the 64-cores it was
> supposed to find blew the job out of memory. I am still not sure why this
> happened when the job currently occupied 120 GB out of 240, but this might
> not be a scikit-bug after all.
>
>
> On Fri, Aug 22, 2014 at 8:37 AM, Olivier Grisel <olivier.gri...@ensta.org>
> wrote:
>
>> Thread based enet CV has been merged after the 0.15 release.
>>
>
>
#import scipy as sp
import numpy as np
import pandas as pd
import multiprocessing as mp
# import iopro
from sklearn import grid_search
from sklearn import cross_validation
from sklearn.preprocessing import StandardScaler
# from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import ElasticNetCV
from sklearn.externals import joblib
def main():
print("Started.")
# n = 10**6
# notreatadapter = iopro.text_adapter('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.csv', parser='csv')
# X = notreatadapter[1:][:] #[0:n]
# y = notreatadapter[0][:] #[0:n]
# notreatdata = pd.read_stata('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.dta')
# notreatdata = notreatdata.iloc[:10000,:]
# X = notreatdata.iloc[:,1:]
# y = notreatdata.iloc[:,0]
X = pd.read_csv('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreat.csv')#,nrows=833333)
y = X.iloc[:,0].values # pd.read_csv('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/absnt.csv', parser='csv',usecols=[0])
X = X.iloc[:,1:].values
n = y.shape[0]
print("Data lodaded.")
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=0)
print("Data split.")
scaler = StandardScaler()
scaler.fit(X_train) # Don't cheat - fit only on training data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test) # apply same transformation to test data
print("Data scaled.")
# build a model
joblib.dump(X_train,'/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreatX')
joblib.dump(y_train,'/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreaty')
print("Data dumped.")
X_train = joblib.load('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreatX', mmap_mode='r+')
y_train = joblib.load('/n/regal/chetty_lab/lsandor/.sweden/T/data/controls/notreaty', mmap_mode='r+')
print("Data reloaded and mmapped.")
# model = SGDClassifier(penalty='elasticnet',n_iter = np.ceil(10**6 / float(n)),shuffle=True)
print("CV starts.")
en = ElasticNetCV(l1_ratio=[.05, .15, .5, .7, .9, .95, .99, 1],n_jobs=8)
en.fit(X_train, y_train)
print("Best for alphas:")
print(en.alpha_)
print("Best l1-ratio:")
print(en.l1_ratio_)
print("Coefficients:")
print(en.coef_)
#evaluate
y_pred = en.predict(X_test)
test_score = r2_score(y_test, y_pred)
print("Test estimator has R^2 %2.2f in the test sample.",test_score)
if __name__=='__main__':
mp.freeze_support()
main()
------------------------------------------------------------------------------
Slashdot TV.
Video for Nerds. Stuff that matters.
http://tv.slashdot.org/
_______________________________________________
Scikit-learn-general mailing list
Scikit-learn-general@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/scikit-learn-general