Hi Andy,
Thanks for your reply. The full traceback is below, weights.shape and
the training data shape are:
(773,)
(773, 82)
I weas using a ExtraTreeClassifier but the same thing happens with an
SVC. It doesn't seem to be an estimator-specific issue.
"""
Traceback (most recent call last):
File "/Users/jgcdesouza/Desktop/script.py",
line 228, in <module>
main()
File "/Users/jgcdesouza/Desktop/script.py",
line 195, in main
search.fit(X_train, y_train)
File
"/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/grid_search.py",
line 898, in fit
return self._fit(X, y, sampled_params)
File
"/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/grid_search.py",
line 505, in _fit
for parameters in parameter_iterable
File
"/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py",
line 666, in __call__
self.retrieve()
File
"/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py",
line 549, in retrieve
raise exception_type(report)
sklearn.externals.joblib.my_exceptions.JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/Users/jgcdesouza/Desktop/script.py
in <module>()
223 wwc = calculate_weighting(ywc_train)
224 # In[ ]:
225
226
227 if __name__ == "__main__":
--> 228 main()
229
230
231
232
...........................................................................
/Users/jgcdesouza/Desktop/script.py
in main()
190 print weights.shape
191 print X_train.shape
192 search = RandomizedSearchCV(svc_pipe,
svc_param_dist, n_iter=n_iter, scoring="accuracy",
193 n_jobs=-1,
iid=True, cv=5, refit=True, verbose=1, random_state=seed,
194
fit_params={"sample_weight": weights})
--> 195 search.fit(X_train, y_train)
search.fit = <bound method RandomizedSearchCV.fit
of Randomiz...t=True,
scoring='accuracy', verbose=1)>
X_train = array([[ 20. , 20. , 1. ,
.... 1. ,
4. , 4. ]])
y_train = array([ 1., 1., 1., 0., 0., 0., 0., 0.,
..., 0., 1.,
1., 0., 0., 1., 1., 0.])
196 print "CPS ET _79ner best params: ",
search.best_params_
197 cps_et_preds_79ner = search.predict(Xcps_test)
198 cps_et_acc_79ner = accuracy_score(ycps_test, cps_et_preds_79ner)
199
...........................................................................
/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/grid_search.py
in fit(self=RandomizedSearchCV(cv=5, error_score='raise',
...it=True,
scoring='accuracy', verbose=1), X=array([[ 20. , 20.
, 1. , .... 1. ,
4. , 4. ]]), y=array([ 1., 1., 1., 0., 0.,
0., 0., 0., ..., 0., 1.,
1., 0., 0., 1., 1., 0.]))
893
894 """
895 sampled_params = ParameterSampler(self.param_distributions,
896 self.n_iter,
897
random_state=self.random_state)
--> 898 return self._fit(X, y, sampled_params)
self._fit = <bound method RandomizedSearchCV._fit of Randomi...t=True,
scoring='accuracy', verbose=1)>
X = array([[ 20. , 20. , 1. , .... 1. ,
4. , 4. ]])
y = array([ 1., 1., 1., 0., 0., 0., 0., 0., ..., 0., 1.,
1., 0., 0., 1., 1., 0.])
sampled_params = <sklearn.grid_search.ParameterSampler object>
899
900
901
902
...........................................................................
/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/grid_search.py
in _fit(self=RandomizedSearchCV(cv=5, error_score='raise',
...it=True,
scoring='accuracy', verbose=1), X=array([[ 20. , 20.
, 1. , .... 1. ,
4. , 4. ]]), y=array([ 1., 1., 1., 0., 0.,
0., 0., 0., ..., 0., 1.,
1., 0., 0., 1., 1., 0.]),
parameter_iterable=<sklearn.grid_search.ParameterSampler object>)
500 )(
501 delayed(_fit_and_score)(clone(base_estimator), X,
y, self.scorer_,
502 train, test, self.verbose,
parameters,
503 self.fit_params,
return_parameters=True,
504 error_score=self.error_score)
--> 505 for parameters in parameter_iterable
parameters = undefined
parameter_iterable = <sklearn.grid_search.ParameterSampler object>
506 for train, test in cv)
507
508 # Out is a list of triplet: score, estimator, n_test_samples
509 n_fits = len(out)
...........................................................................
/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py
in __call__(self=Parallel(n_jobs=-1), iterable=<itertools.islice
object>)
661 if pre_dispatch == "all" or n_jobs == 1:
662 # The iterable was consumed all at once by the
above for loop.
663 # No need to wait for async callbacks to trigger to
664 # consumption.
665 self._iterating = False
--> 666 self.retrieve()
self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
667 # Make sure that we get a last message telling us
we are done
668 elapsed_time = time.time() - self._start_time
669 self._print('Done %3i out of %3i | elapsed: %s finished',
670 (len(self._output),
---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError Tue Jun 9 10:43:22 2015
PID: 4756 Python 2.7.10: /Users/jgcdesouza/anaconda/bin/python
...........................................................................
/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc
in _fit_and_score(estimator=Pipeline(steps=[('standardscaler',
StandardScale...one, shrinking=True, tol=0.001, verbose=False))]),
X=array([[ 20. , 20. , 1. , .... 1. ,
4. , 4. ]]), y=array([ 1., 1., 1., 0., 0.,
0., 0., 0., ..., 0., 1.,
1., 0., 0., 1., 1., 0.]),
scorer=make_scorer(accuracy_score), train=array([149, 151, 153, 154,
155, 156, 160, 162, 1..., 765,
766, 767, 768, 769, 770, 771, 772]), test=array([ 0, 1, 2,
3, 4, 5, 6, 7, ...45, 146, 147, 148, 150, 152, 157, 158,
159, 161]), verbose=1, parameters={'svc__C': 0.033262207360466804},
fit_params={'sample_weight': array([ 0.54980595, 0.54980595,
0.54980595, 0...5,
0.54980595, 0.54980595, 0.45019405])},
return_train_score=False, return_parameters=True, error_score='raise')
1454
1455 try:
1456 if y_train is None:
1457 estimator.fit(X_train, **fit_params)
1458 else:
-> 1459 estimator.fit(X_train, y_train, **fit_params)
1460
1461 except Exception as e:
1462 if error_score == 'raise':
1463 raise
...........................................................................
/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/pipeline.pyc
in fit(self=Pipeline(steps=[('standardscaler', StandardScale...one,
shrinking=True, tol=0.001, verbose=False))]), X=array([[ 16. ,
16. , 1. , .... 1. ,
4. , 4. ]]), y=array([ 1., 1., 1., 1., 1.,
1., 1., 0., ..., 0.,
1., 1., 0., 0., 1., 1., 0.]),
**fit_params={'sample_weight': array([ 0.54980595, 0.54980595,
0.54980595, 0...5,
0.54980595, 0.54980595, 0.45019405])})
135 pipeline.
136 y : iterable, default=None
137 Training targets. Must fulfill label requirements
for all steps of
138 the pipeline.
139 """
--> 140 Xt, fit_params = self._pre_transform(X, y, **fit_params)
141 self.steps[-1][-1].fit(Xt, y, **fit_params)
142 return self
143
144 def fit_transform(self, X, y=None, **fit_params):
...........................................................................
/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/pipeline.pyc
in _pre_transform(self=Pipeline(steps=[('standardscaler',
StandardScale...one, shrinking=True, tol=0.001, verbose=False))]),
X=array([[ 16. , 16. , 1. , .... 1. ,
4. , 4. ]]), y=array([ 1., 1., 1., 1., 1.,
1., 1., 0., ..., 0.,
1., 1., 0., 0., 1., 1., 0.]),
**fit_params={'sample_weight': array([ 0.54980595, 0.54980595,
0.54980595, 0...5,
0.54980595, 0.54980595, 0.45019405])})
111 # Estimator interface
112
113 def _pre_transform(self, X, y=None, **fit_params):
114 fit_params_steps = dict((step, {}) for step, _ in self.steps)
115 for pname, pval in six.iteritems(fit_params):
--> 116 step, param = pname.split('__', 1)
117 fit_params_steps[step][param] = pval
118 Xt = X
119 for name, transform in self.steps[:-1]:
120 if hasattr(transform, "fit_transform"):
ValueError: need more than 1 value to unpack
___________________________________________________________________________
Process finished with exit code 1
"""
José Guilherme
On Mon, Jun 8, 2015 at 6:21 PM, Andy <[email protected]> wrote:
> Hi Jose.
> That should work.
> Can you provide the full traceback?
> Also can you provide weights.shape?
>
> Andy
>
> On 06/08/2015 08:49 PM, José Guilherme Camargo de Souza wrote:
>> Hi all,
>>
>> I am having a different issue when trying to use sample_weights with
>> RandomizedSearchCV:
>>
>> weights = np.array(calculate_weighting(y_train))
>> search = RandomizedSearchCV(estimator, param_dist, n_iter=n_iter,
>> scoring="accuracy",
>> n_jobs=-1, iid=True, cv=5,
>> refit=True, verbose=1, random_state=seed,
>> fit_params={"sample_weight":
>> weights})
>>
>> search.fit(X_train, y_train)
>>
>> where weights has the same number of instances in X_train.
>> I get the following error:
>>
>> ValueError: need more than 1 value to unpack
>>
>> I am using scikit-learn 0.16.1, therefore a more recent version than
>> 0.15b. Was there some sort of change in the behavior of fit_params
>> from 0.15b to 0.16.1?
>>
>> What is the current recommended way to pass the sample_weights vector
>> to a *SearchCV object, if any?
>>
>> Thanks!
>> José
>>
>>
>> On Tue, Jul 8, 2014 at 9:33 AM, Hamed Zamani <[email protected]> wrote:
>>> Dear Joel,
>>>
>>> Yes. After updating the version of Scikit-learn to 0.15b2 the problem was
>>> solved.
>>>
>>> Thanks,
>>> Hamed
>>>
>>>
>>>
>>> On Tue, Jul 8, 2014 at 2:51 PM, Joel Nothman <[email protected]> wrote:
>>>> This shouldn't be the case, though it's not altogether well-documented.
>>>> According to
>>>> https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/cross_validation.py#L1225,
>>>> if the fit_params value has the same length as the samples, it should be
>>>> similarly indexed.
>>>>
>>>> So this would be a bug ... if it is found at master. I'm guessing, Hamed,
>>>> that you are using scikit-learn version 0.14? Please check this works with
>>>> the latest 0.15b.
>>>>
>>>> However, fit_params will not account for the weights in the scoring
>>>> function. Noel has solved this; pending some more tests, this should
>>>> hopefully be merged, including support for RandomizedSearchCV(...,
>>>> sample_weight=weights_array) soon. (The work seems to have stalled a
>>>> little.
>>>> If someone wants to see this feature included quickly, perhaps Noel would
>>>> be
>>>> willing for someone else to finish this PR for him.)
>>>>
>>>> - Joel
>>>>
>>>>
>>>> On 8 July 2014 07:49, Kyle Kastner <[email protected]> wrote:
>>>>> It looks like fit_params are passed wholesale to the classifier being fit
>>>>> - this means the sample weights will be a different size than the fold of
>>>>> (X, y) fed to the classifier (since the weights aren't getting
>>>>> KFolded...).
>>>>> Unfortunately I do not see a way to accomodate for this currently -
>>>>> sample_weights may be a special case where we would need to introspect the
>>>>> fit_params and modify them before passing to the underlying classifier...
>>>>> can you file a bug report on github?
>>>>>
>>>>>
>>>>> On Tue, Jul 8, 2014 at 1:27 PM, Hamed Zamani <[email protected]> wrote:
>>>>>> Dear all,
>>>>>>
>>>>>> I am using Scikit-Learn library and I want to weight all training
>>>>>> samples (according to unbalanced data). According to the tutorial and
>>>>>> what I
>>>>>> found in the web, I should use this method:
>>>>>>
>>>>>> search = RandomizedSearchCV(estimator, param_distributions,
>>>>>> n_iter=args.iterations, scoring=mae_scorer,n_jobs=1, refit=True,
>>>>>> cv=KFold(X_train.shape[0], 10, shuffle=True, random_state=args.seed),
>>>>>> verbose=1, random_state=args.seed, fit_params={'sample_weight':
>>>>>> weights_array})
>>>>>>
>>>>>> search.fit(X_trains, y_train)
>>>>>>
>>>>>> where "wights_array" is an array containing the weight of each training
>>>>>> sample. After running the code, I was stopped with the following
>>>>>> exception:
>>>>>>
>>>>>> ValueError: operands could not be broadcast together with shapes (1118,)
>>>>>> (1006,) (1118,)
>>>>>>
>>>>>> It is worth noting that the size of "X_trains", "y_train", and
>>>>>> "weights_array" are equal to 1118.
>>>>>>
>>>>>> When I changed the number of folds from 10 to 2, the exception was
>>>>>> changed to this one:
>>>>>>
>>>>>> ValueError: operands could not be broadcast together with shapes (1118,)
>>>>>> (559,) (1118,)
>>>>>>
>>>>>> Do you know what is the problem? I guess the problem is with "KFold"
>>>>>> method. Any idea is appreciated.
>>>>>>
>>>>>> Kind Regards,
>>>>>> Hamed
>>>>>>
>>>>>>
>>>>>>
>>>>>> ------------------------------------------------------------------------------
>>>>>> Open source business process management suite built on Java and Eclipse
>>>>>> Turn processes into business applications with Bonita BPM Community
>>>>>> Edition
>>>>>> Quickly connect people, data, and systems into organized workflows
>>>>>> Winner of BOSSIE, CODIE, OW2 and Gartner awards
>>>>>> http://p.sf.net/sfu/Bonitasoft
>>>>>> _______________________________________________
>>>>>> Scikit-learn-general mailing list
>>>>>> [email protected]
>>>>>> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general
>>>>>>
>>>>>
>>>>>
>>>>> ------------------------------------------------------------------------------
>>>>> Open source business process management suite built on Java and Eclipse
>>>>> Turn processes into business applications with Bonita BPM Community
>>>>> Edition
>>>>> Quickly connect people, data, and systems into organized workflows
>>>>> Winner of BOSSIE, CODIE, OW2 and Gartner awards
>>>>> http://p.sf.net/sfu/Bonitasoft
>>>>> _______________________________________________
>>>>> Scikit-learn-general mailing list
>>>>> [email protected]
>>>>> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general
>>>>>
>>>>
>>>>
>>>> ------------------------------------------------------------------------------
>>>> Open source business process management suite built on Java and Eclipse
>>>> Turn processes into business applications with Bonita BPM Community
>>>> Edition
>>>> Quickly connect people, data, and systems into organized workflows
>>>> Winner of BOSSIE, CODIE, OW2 and Gartner awards
>>>> http://p.sf.net/sfu/Bonitasoft
>>>> _______________________________________________
>>>> Scikit-learn-general mailing list
>>>> [email protected]
>>>> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general
>>>>
>>>
>>> ------------------------------------------------------------------------------
>>> Open source business process management suite built on Java and Eclipse
>>> Turn processes into business applications with Bonita BPM Community Edition
>>> Quickly connect people, data, and systems into organized workflows
>>> Winner of BOSSIE, CODIE, OW2 and Gartner awards
>>> http://p.sf.net/sfu/Bonitasoft
>>> _______________________________________________
>>> Scikit-learn-general mailing list
>>> [email protected]
>>> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general
>>>
>> ------------------------------------------------------------------------------
>> _______________________________________________
>> Scikit-learn-general mailing list
>> [email protected]
>> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general
>
>
> ------------------------------------------------------------------------------
> _______________________________________________
> Scikit-learn-general mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general
------------------------------------------------------------------------------
_______________________________________________
Scikit-learn-general mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/scikit-learn-general