Once x = preprocessing.scale(df1)
is called, the input to your estimator is no longer a dataframe, so the column transformer can not use strings to select columns. Thomas > On Friday, May 29, 2020 at 11:46 AM, Chamila Wijayarathna > <cdwijayarat...@gmail.com (mailto:cdwijayarat...@gmail.com)> wrote: > Hi, > > Thanks, this solution fixed the issue. However, it introduces a new error, > which was not there before. > > Traceback (most recent call last): > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\utils\__init__.py", > line 425, in _get_column_indices > all_columns = X.columns > AttributeError: 'numpy.ndarray' object has no attribute 'columns' > During handling of the above exception, another exception occurred: > Traceback (most recent call last): > File "<input>", line 1, in <module> > File "C:\Program Files\JetBrains\PyCharm > 2020.1.1\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, > in runfile > pydev_imports.execfile(filename, global_vars, local_vars) # execute the script > File "C:\Program Files\JetBrains\PyCharm > 2020.1.1\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line > 18, in execfile > exec(compile(contents+"\n", file, 'exec'), glob, loc) > File "C:/Users/ASUS/PycharmProjects/swelltest/enemble.py", line 127, in > <module> > ens.fit(x_train,y_train) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\ensemble\_voting.py", > line 265, in fit > return super().fit(X, transformed_y, sample_weight) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\ensemble\_voting.py", > line 81, in fit > for idx, clf in enumerate(clfs) if clf not in (None, 'drop') > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\joblib\parallel.py", > line 1029, in __call__ > if self.dispatch_one_batch(iterator): > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\joblib\parallel.py", > line 847, in dispatch_one_batch > self._dispatch(tasks) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\joblib\parallel.py", > line 765, in _dispatch > job = self._backend.apply_async(batch, callback=cb) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\joblib\_parallel_backends.py", > line 206, in apply_async > result = ImmediateResult(func) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\joblib\_parallel_backends.py", > line 570, in __init__ > self.results = batch() > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\joblib\parallel.py", > line 253, in __call__ > for func, args, kwargs in self.items] > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\joblib\parallel.py", > line 253, in <listcomp> > for func, args, kwargs in self.items] > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\ensemble\_base.py", > line 40, in _fit_single_estimator > estimator.fit(X, y) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\pipeline.py", > line 330, in fit > Xt = self._fit(X, y, **fit_params_steps) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\pipeline.py", > line 296, in _fit > **fit_params_steps[name]) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\joblib\memory.py", > line 352, in __call__ > return self.func(*args, **kwargs) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\pipeline.py", > line 740, in _fit_transform_one > res = transformer.fit_transform(X, y, **fit_params) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\compose\_column_transformer.py", > line 529, in fit_transform > self._validate_remainder(X) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\compose\_column_transformer.py", > line 327, in _validate_remainder > cols.extend(_get_column_indices(X, columns)) > File > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\utils\__init__.py", > line 427, in _get_column_indices > raise ValueError("Specifying the columns using strings is only " > ValueError: Specifying the columns using strings is only supported for pandas > DataFrames > > Thanks > On Fri, May 29, 2020 at 7:33 PM Thomas J Fan <thomasjp...@gmail.com > (mailto:thomasjp...@gmail.com)> wrote: > > VotingClassifer also needs names: > > > > ens = VotingClassifier(estimators=[('pipe1', pipe_phy), ('pipe2', pipe_fa)]) > > > > Thomas > > > > > On Friday, May 29, 2020 at 2:33 AM, Chamila Wijayarathna > > > <cdwijayarat...@gmail.com (mailto:cdwijayarat...@gmail.com)> wrote: > > > Hi all, > > > > > > I did manage to get the code to run using a workaround, which is bit ugly. > > > > > > Following is the complete stacktrace of the error I was receiving. > > > > > > Traceback (most recent call last): > > > File "<input>", line 1, in <module> > > > File "C:\Program Files\JetBrains\PyCharm > > > 2020.1.1\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line > > > 197, in runfile > > > pydev_imports.execfile(filename, global_vars, local_vars) # execute the > > > script > > > File "C:\Program Files\JetBrains\PyCharm > > > 2020.1.1\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", > > > line 18, in execfile > > > exec(compile(contents+"\n", file, 'exec'), glob, loc) > > > File "C:/Users/ASUS/PycharmProjects/swelltest/enemble.py", line 112, in > > > <module> > > > ens.fit(x_train,y_train) > > > File > > > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\ensemble\_voting.py", > > > line 265, in fit > > > return super().fit(X, transformed_y, sample_weight) > > > File > > > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\ensemble\_voting.py", > > > line 65, in fit > > > names, clfs = self._validate_estimators() > > > File > > > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\ensemble\_base.py", > > > line 228, in _validate_estimators > > > self._validate_names(names) > > > File > > > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\utils\metaestimators.py", > > > line 77, in _validate_names > > > invalid_names = [name for name in names if '__' in name] > > > File > > > "C:\Users\ASUS\PycharmProjects\swelltest\venv\lib\site-packages\sklearn\utils\metaestimators.py", > > > line 77, in <listcomp> > > > invalid_names = [name for name in names if '__' in name] > > > TypeError: argument of type 'ColumnTransformer' is not iterable > > > > > > Following are the inputs in 'names' list at the time of the error. > > > > > > 1- ColumnTransformer(transformers=[('phy', Pipeline(steps=[('imputer', > > > SimpleImputer(strategy='median')), ('scaler', StandardScaler())]), ['HR', > > > 'RMSSD', 'SCL'])]) > > > 2- > > > ColumnTransformer(transformers=[('fa',Pipeline(steps=[('imputer',SimpleImputer(strategy='median')),('scaler', > > > StandardScaler())]),['Squality', 'Sneutral', 'Shappy'])]) > > > > > > Seems like that the library is attempting to search for '__' substring of > > > the ColumnTransform object, which it is unable to perform. > > > > > > Since this name check doesn't have a signiticant effect on my > > > functionality, I commented following snippet at > > > sklearn\utils\metaestimators.py. > > > > > > invalid_names = [name for name in names if '__' in name] > > > if invalid_names: > > > raise ValueError('Estimator names must not contain __: got ' > > > '{0!r}'.format(invalid_names)) > > > > > > Please let me know if there is a better workaround or that their are any > > > issues of commenting out this code. > > > > > > Thanks > > > On Fri, May 29, 2020 at 10:33 AM Chamila Wijayarathna > > > <cdwijayarat...@gmail.com (mailto:cdwijayarat...@gmail.com)> wrote: > > > > Hello all, > > > > > > > > I hope I am writing to the correct mailing list about this issue that I > > > > am having. Please apologize me if I am not. > > > > > > > > I am attempting to use a pipeline to feed an ensemble voting classifier > > > > as I want the ensemble learner to use models that train on different > > > > feature sets. For this purpose, I followed the tutorial available at > > > > [1]. > > > > > > > > Following is the code that I could develop so far. > > > > > > > > y = df1.index > > > > x = preprocessing.scale(df1) > > > > > > > > phy_features = ['A', 'B', 'C'] > > > > phy_transformer = Pipeline(steps=[('imputer', > > > > SimpleImputer(strategy='median')), ('scaler', StandardScaler())]) > > > > phy_processer = ColumnTransformer(transformers=[('phy', > > > > phy_transformer, phy_features)]) > > > > > > > > fa_features = ['D', 'E', 'F'] > > > > fa_transformer = Pipeline(steps=[('imputer', > > > > SimpleImputer(strategy='median')), ('scaler', StandardScaler())]) > > > > fa_processer = ColumnTransformer(transformers=[('fa', fa_transformer, > > > > fa_features)]) > > > > > > > > > > > > pipe_phy = Pipeline(steps=[('preprocessor', phy_processer > > > > ),('classifier', SVM)]) > > > > pipe_fa = Pipeline(steps=[('preprocessor', fa_processer > > > > ),('classifier', SVM)]) > > > > > > > > ens = VotingClassifier(estimators=[pipe_phy, pipe_fa]) > > > > > > > > cv = KFold(n_splits=10, random_state=None, shuffle=True) > > > > for train_index, test_index in cv.split(x): > > > > x_train, x_test = x[train_index], x[test_index] > > > > y_train, y_test = y[train_index], y[test_index] > > > > ens.fit(x_train,y_train) > > > > print(ens.score(x_test, y_test)) > > > > > > > > However, when running the code, I am getting an error saying TypeError: > > > > argument of type 'ColumnTransformer' is not iterable, at the line > > > > ens.fit(x_train,y_train). > > > > > > > > What is the reason for this and how can I fix it? > > > > > > > > Thank you, > > > > Chamila > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > -- > > > Chamila Dilshan Wijayarathna, > > > PhD Research Student > > > The University of New South Wales (UNSW Canberra) > > > Australian Centre for Cyber Security > > > Australian Defence Force Academy > > > PO Box 7916, Canberra BA ACT 2610 > > > Australia > > > Mobile:(+61)416895795 > > > > > > > > > _______________________________________________ > > > scikit-learn mailing list > > > scikit-learn@python.org (mailto:scikit-learn@python.org) > > > https://mail.python.org/mailman/listinfo/scikit-learn > > _______________________________________________ > > scikit-learn mailing list > > scikit-learn@python.org (mailto:scikit-learn@python.org) > > https://mail.python.org/mailman/listinfo/scikit-learn > > > -- > Chamila Dilshan Wijayarathna, > PhD Research Student > The University of New South Wales (UNSW Canberra) > Australian Centre for Cyber Security > Australian Defence Force Academy > PO Box 7916, Canberra BA ACT 2610 > Australia > Mobile:(+61)416895795 > > > _______________________________________________ > scikit-learn mailing list > scikit-learn@python.org > https://mail.python.org/mailman/listinfo/scikit-learn
_______________________________________________ scikit-learn mailing list scikit-learn@python.org https://mail.python.org/mailman/listinfo/scikit-learn