Hi Andy, Thanks for your reply. The full traceback is below, weights.shape and the training data shape are:
(773,) (773, 82) I weas using a ExtraTreeClassifier but the same thing happens with an SVC. It doesn't seem to be an estimator-specific issue. """ Traceback (most recent call last): File "/Users/jgcdesouza/Desktop/script.py", line 228, in <module> main() File "/Users/jgcdesouza/Desktop/script.py", line 195, in main search.fit(X_train, y_train) File "/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/grid_search.py", line 898, in fit return self._fit(X, y, sampled_params) File "/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/grid_search.py", line 505, in _fit for parameters in parameter_iterable File "/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 666, in __call__ self.retrieve() File "/Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 549, in retrieve raise exception_type(report) sklearn.externals.joblib.my_exceptions.JoblibValueError: JoblibValueError ___________________________________________________________________________ Multiprocessing exception: ........................................................................... /Users/jgcdesouza/Desktop/script.py in <module>() 223 wwc = calculate_weighting(ywc_train) 224 # In[ ]: 225 226 227 if __name__ == "__main__": --> 228 main() 229 230 231 232 ........................................................................... /Users/jgcdesouza/Desktop/script.py in main() 190 print weights.shape 191 print X_train.shape 192 search = RandomizedSearchCV(svc_pipe, svc_param_dist, n_iter=n_iter, scoring="accuracy", 193 n_jobs=-1, iid=True, cv=5, refit=True, verbose=1, random_state=seed, 194 fit_params={"sample_weight": weights}) --> 195 search.fit(X_train, y_train) search.fit = <bound method RandomizedSearchCV.fit of Randomiz...t=True, scoring='accuracy', verbose=1)> X_train = array([[ 20. , 20. , 1. , .... 1. , 4. , 4. ]]) y_train = array([ 1., 1., 1., 0., 0., 0., 0., 0., ..., 0., 1., 1., 0., 0., 1., 1., 0.]) 196 print "CPS ET _79ner best params: ", search.best_params_ 197 cps_et_preds_79ner = search.predict(Xcps_test) 198 cps_et_acc_79ner = accuracy_score(ycps_test, cps_et_preds_79ner) 199 ........................................................................... /Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/grid_search.py in fit(self=RandomizedSearchCV(cv=5, error_score='raise', ...it=True, scoring='accuracy', verbose=1), X=array([[ 20. , 20. , 1. , .... 1. , 4. , 4. ]]), y=array([ 1., 1., 1., 0., 0., 0., 0., 0., ..., 0., 1., 1., 0., 0., 1., 1., 0.])) 893 894 """ 895 sampled_params = ParameterSampler(self.param_distributions, 896 self.n_iter, 897 random_state=self.random_state) --> 898 return self._fit(X, y, sampled_params) self._fit = <bound method RandomizedSearchCV._fit of Randomi...t=True, scoring='accuracy', verbose=1)> X = array([[ 20. , 20. , 1. , .... 1. , 4. , 4. ]]) y = array([ 1., 1., 1., 0., 0., 0., 0., 0., ..., 0., 1., 1., 0., 0., 1., 1., 0.]) sampled_params = <sklearn.grid_search.ParameterSampler object> 899 900 901 902 ........................................................................... /Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/grid_search.py in _fit(self=RandomizedSearchCV(cv=5, error_score='raise', ...it=True, scoring='accuracy', verbose=1), X=array([[ 20. , 20. , 1. , .... 1. , 4. , 4. ]]), y=array([ 1., 1., 1., 0., 0., 0., 0., 0., ..., 0., 1., 1., 0., 0., 1., 1., 0.]), parameter_iterable=<sklearn.grid_search.ParameterSampler object>) 500 )( 501 delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_, 502 train, test, self.verbose, parameters, 503 self.fit_params, return_parameters=True, 504 error_score=self.error_score) --> 505 for parameters in parameter_iterable parameters = undefined parameter_iterable = <sklearn.grid_search.ParameterSampler object> 506 for train, test in cv) 507 508 # Out is a list of triplet: score, estimator, n_test_samples 509 n_fits = len(out) ........................................................................... /Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<itertools.islice object>) 661 if pre_dispatch == "all" or n_jobs == 1: 662 # The iterable was consumed all at once by the above for loop. 663 # No need to wait for async callbacks to trigger to 664 # consumption. 665 self._iterating = False --> 666 self.retrieve() self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)> 667 # Make sure that we get a last message telling us we are done 668 elapsed_time = time.time() - self._start_time 669 self._print('Done %3i out of %3i | elapsed: %s finished', 670 (len(self._output), --------------------------------------------------------------------------- Sub-process traceback: --------------------------------------------------------------------------- ValueError Tue Jun 9 10:43:22 2015 PID: 4756 Python 2.7.10: /Users/jgcdesouza/anaconda/bin/python ........................................................................... /Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator=Pipeline(steps=[('standardscaler', StandardScale...one, shrinking=True, tol=0.001, verbose=False))]), X=array([[ 20. , 20. , 1. , .... 1. , 4. , 4. ]]), y=array([ 1., 1., 1., 0., 0., 0., 0., 0., ..., 0., 1., 1., 0., 0., 1., 1., 0.]), scorer=make_scorer(accuracy_score), train=array([149, 151, 153, 154, 155, 156, 160, 162, 1..., 765, 766, 767, 768, 769, 770, 771, 772]), test=array([ 0, 1, 2, 3, 4, 5, 6, 7, ...45, 146, 147, 148, 150, 152, 157, 158, 159, 161]), verbose=1, parameters={'svc__C': 0.033262207360466804}, fit_params={'sample_weight': array([ 0.54980595, 0.54980595, 0.54980595, 0...5, 0.54980595, 0.54980595, 0.45019405])}, return_train_score=False, return_parameters=True, error_score='raise') 1454 1455 try: 1456 if y_train is None: 1457 estimator.fit(X_train, **fit_params) 1458 else: -> 1459 estimator.fit(X_train, y_train, **fit_params) 1460 1461 except Exception as e: 1462 if error_score == 'raise': 1463 raise ........................................................................... /Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/pipeline.pyc in fit(self=Pipeline(steps=[('standardscaler', StandardScale...one, shrinking=True, tol=0.001, verbose=False))]), X=array([[ 16. , 16. , 1. , .... 1. , 4. , 4. ]]), y=array([ 1., 1., 1., 1., 1., 1., 1., 0., ..., 0., 1., 1., 0., 0., 1., 1., 0.]), **fit_params={'sample_weight': array([ 0.54980595, 0.54980595, 0.54980595, 0...5, 0.54980595, 0.54980595, 0.45019405])}) 135 pipeline. 136 y : iterable, default=None 137 Training targets. Must fulfill label requirements for all steps of 138 the pipeline. 139 """ --> 140 Xt, fit_params = self._pre_transform(X, y, **fit_params) 141 self.steps[-1][-1].fit(Xt, y, **fit_params) 142 return self 143 144 def fit_transform(self, X, y=None, **fit_params): ........................................................................... /Users/jgcdesouza/anaconda/lib/python2.7/site-packages/sklearn/pipeline.pyc in _pre_transform(self=Pipeline(steps=[('standardscaler', StandardScale...one, shrinking=True, tol=0.001, verbose=False))]), X=array([[ 16. , 16. , 1. , .... 1. , 4. , 4. ]]), y=array([ 1., 1., 1., 1., 1., 1., 1., 0., ..., 0., 1., 1., 0., 0., 1., 1., 0.]), **fit_params={'sample_weight': array([ 0.54980595, 0.54980595, 0.54980595, 0...5, 0.54980595, 0.54980595, 0.45019405])}) 111 # Estimator interface 112 113 def _pre_transform(self, X, y=None, **fit_params): 114 fit_params_steps = dict((step, {}) for step, _ in self.steps) 115 for pname, pval in six.iteritems(fit_params): --> 116 step, param = pname.split('__', 1) 117 fit_params_steps[step][param] = pval 118 Xt = X 119 for name, transform in self.steps[:-1]: 120 if hasattr(transform, "fit_transform"): ValueError: need more than 1 value to unpack ___________________________________________________________________________ Process finished with exit code 1 """ José Guilherme On Mon, Jun 8, 2015 at 6:21 PM, Andy <t3k...@gmail.com> wrote: > Hi Jose. > That should work. > Can you provide the full traceback? > Also can you provide weights.shape? > > Andy > > On 06/08/2015 08:49 PM, José Guilherme Camargo de Souza wrote: >> Hi all, >> >> I am having a different issue when trying to use sample_weights with >> RandomizedSearchCV: >> >> weights = np.array(calculate_weighting(y_train)) >> search = RandomizedSearchCV(estimator, param_dist, n_iter=n_iter, >> scoring="accuracy", >> n_jobs=-1, iid=True, cv=5, >> refit=True, verbose=1, random_state=seed, >> fit_params={"sample_weight": >> weights}) >> >> search.fit(X_train, y_train) >> >> where weights has the same number of instances in X_train. >> I get the following error: >> >> ValueError: need more than 1 value to unpack >> >> I am using scikit-learn 0.16.1, therefore a more recent version than >> 0.15b. Was there some sort of change in the behavior of fit_params >> from 0.15b to 0.16.1? >> >> What is the current recommended way to pass the sample_weights vector >> to a *SearchCV object, if any? >> >> Thanks! >> José >> >> >> On Tue, Jul 8, 2014 at 9:33 AM, Hamed Zamani <hamedzam...@acm.org> wrote: >>> Dear Joel, >>> >>> Yes. After updating the version of Scikit-learn to 0.15b2 the problem was >>> solved. >>> >>> Thanks, >>> Hamed >>> >>> >>> >>> On Tue, Jul 8, 2014 at 2:51 PM, Joel Nothman <joel.noth...@gmail.com> wrote: >>>> This shouldn't be the case, though it's not altogether well-documented. >>>> According to >>>> https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/cross_validation.py#L1225, >>>> if the fit_params value has the same length as the samples, it should be >>>> similarly indexed. >>>> >>>> So this would be a bug ... if it is found at master. I'm guessing, Hamed, >>>> that you are using scikit-learn version 0.14? Please check this works with >>>> the latest 0.15b. >>>> >>>> However, fit_params will not account for the weights in the scoring >>>> function. Noel has solved this; pending some more tests, this should >>>> hopefully be merged, including support for RandomizedSearchCV(..., >>>> sample_weight=weights_array) soon. (The work seems to have stalled a >>>> little. >>>> If someone wants to see this feature included quickly, perhaps Noel would >>>> be >>>> willing for someone else to finish this PR for him.) >>>> >>>> - Joel >>>> >>>> >>>> On 8 July 2014 07:49, Kyle Kastner <kastnerk...@gmail.com> wrote: >>>>> It looks like fit_params are passed wholesale to the classifier being fit >>>>> - this means the sample weights will be a different size than the fold of >>>>> (X, y) fed to the classifier (since the weights aren't getting >>>>> KFolded...). >>>>> Unfortunately I do not see a way to accomodate for this currently - >>>>> sample_weights may be a special case where we would need to introspect the >>>>> fit_params and modify them before passing to the underlying classifier... >>>>> can you file a bug report on github? >>>>> >>>>> >>>>> On Tue, Jul 8, 2014 at 1:27 PM, Hamed Zamani <hamedzam...@acm.org> wrote: >>>>>> Dear all, >>>>>> >>>>>> I am using Scikit-Learn library and I want to weight all training >>>>>> samples (according to unbalanced data). According to the tutorial and >>>>>> what I >>>>>> found in the web, I should use this method: >>>>>> >>>>>> search = RandomizedSearchCV(estimator, param_distributions, >>>>>> n_iter=args.iterations, scoring=mae_scorer,n_jobs=1, refit=True, >>>>>> cv=KFold(X_train.shape[0], 10, shuffle=True, random_state=args.seed), >>>>>> verbose=1, random_state=args.seed, fit_params={'sample_weight': >>>>>> weights_array}) >>>>>> >>>>>> search.fit(X_trains, y_train) >>>>>> >>>>>> where "wights_array" is an array containing the weight of each training >>>>>> sample. After running the code, I was stopped with the following >>>>>> exception: >>>>>> >>>>>> ValueError: operands could not be broadcast together with shapes (1118,) >>>>>> (1006,) (1118,) >>>>>> >>>>>> It is worth noting that the size of "X_trains", "y_train", and >>>>>> "weights_array" are equal to 1118. >>>>>> >>>>>> When I changed the number of folds from 10 to 2, the exception was >>>>>> changed to this one: >>>>>> >>>>>> ValueError: operands could not be broadcast together with shapes (1118,) >>>>>> (559,) (1118,) >>>>>> >>>>>> Do you know what is the problem? I guess the problem is with "KFold" >>>>>> method. Any idea is appreciated. >>>>>> >>>>>> Kind Regards, >>>>>> Hamed >>>>>> >>>>>> >>>>>> >>>>>> ------------------------------------------------------------------------------ >>>>>> Open source business process management suite built on Java and Eclipse >>>>>> Turn processes into business applications with Bonita BPM Community >>>>>> Edition >>>>>> Quickly connect people, data, and systems into organized workflows >>>>>> Winner of BOSSIE, CODIE, OW2 and Gartner awards >>>>>> http://p.sf.net/sfu/Bonitasoft >>>>>> _______________________________________________ >>>>>> Scikit-learn-general mailing list >>>>>> Scikit-learn-general@lists.sourceforge.net >>>>>> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general >>>>>> >>>>> >>>>> >>>>> ------------------------------------------------------------------------------ >>>>> Open source business process management suite built on Java and Eclipse >>>>> Turn processes into business applications with Bonita BPM Community >>>>> Edition >>>>> Quickly connect people, data, and systems into organized workflows >>>>> Winner of BOSSIE, CODIE, OW2 and Gartner awards >>>>> http://p.sf.net/sfu/Bonitasoft >>>>> _______________________________________________ >>>>> Scikit-learn-general mailing list >>>>> Scikit-learn-general@lists.sourceforge.net >>>>> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general >>>>> >>>> >>>> >>>> ------------------------------------------------------------------------------ >>>> Open source business process management suite built on Java and Eclipse >>>> Turn processes into business applications with Bonita BPM Community >>>> Edition >>>> Quickly connect people, data, and systems into organized workflows >>>> Winner of BOSSIE, CODIE, OW2 and Gartner awards >>>> http://p.sf.net/sfu/Bonitasoft >>>> _______________________________________________ >>>> Scikit-learn-general mailing list >>>> Scikit-learn-general@lists.sourceforge.net >>>> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general >>>> >>> >>> ------------------------------------------------------------------------------ >>> Open source business process management suite built on Java and Eclipse >>> Turn processes into business applications with Bonita BPM Community Edition >>> Quickly connect people, data, and systems into organized workflows >>> Winner of BOSSIE, CODIE, OW2 and Gartner awards >>> http://p.sf.net/sfu/Bonitasoft >>> _______________________________________________ >>> Scikit-learn-general mailing list >>> Scikit-learn-general@lists.sourceforge.net >>> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general >>> >> ------------------------------------------------------------------------------ >> _______________________________________________ >> Scikit-learn-general mailing list >> Scikit-learn-general@lists.sourceforge.net >> https://lists.sourceforge.net/lists/listinfo/scikit-learn-general > > > ------------------------------------------------------------------------------ > _______________________________________________ > Scikit-learn-general mailing list > Scikit-learn-general@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/scikit-learn-general ------------------------------------------------------------------------------ _______________________________________________ Scikit-learn-general mailing list Scikit-learn-general@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/scikit-learn-general