import numpy as np

from sklearn.datasets import load_boston from sklearn.ensemble import
RandomForestRegressor from sklearn.pipeline import Pipeline from
sklearn.preprocessing import Imputer from sklearn.cross_validation import
cross_val_score

rng = np.random.RandomState(0)

dataset = load_boston() X_full, y_full = dataset.data, dataset.target
n_samples = X_full.shape[0] n_features = X_full.shape[1]

estimator = RandomForestRegressor(random_state=0, n_estimators=100) score =
cross_val_score(estimator, X_full, y_full).mean() print("Score with the
entire dataset = %.2f" % score)
Add missing values in 75% of the lines

missing_rate = 0.75 n_missing_samples = np.floor(n_samples * missing_rate)
missing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,
dtype=np.bool), np.ones(n_missing_samples, dtype=np.bool)))
rng.shuffle(missing_samples) missing_features = rng.randint(0, n_features,
n_missing_samples)
Estimate the score without the lines containing missing values

X_filtered = X_full[~missing_samples, :] y_filtered =
y_full[~missing_samples] estimator = RandomForestRegressor(random_state=0,
n_estimators=100) score = cross_val_score(estimator, X_filtered,
y_filtered).mean() print("Score without the samples containing missing
values = %.2f" % score)
Estimate the score after imputation of the missing values

X_missing = X_full.copy() X_missing[np.where(missing_samples)[0],
missing_features] = 0 y_missing = y_full.copy() estimator =
Pipeline([("imputer", Imputer(missing_values=0, strategy="mean", axis=0)),
("forest", RandomForestRegressor(random_state=0, n_estimators=100))]) score
= cross_val_score(estimator, X_missing, y_missing).mean() print("Score
after imputation of the missing values = %.2f" % score)

This all runs fine.

Now I want to predict, which should be simple, but
estimator.predict(X=X_filtered[1:10,:])

returns the following error:

"AttributeError: 'Imputer' object has no attribute 'statistics_'"

What is wrong here?
------------------------------------------------------------------------------
Dive into the World of Parallel Programming! The Go Parallel Website,
sponsored by Intel and developed in partnership with Slashdot Media, is your
hub for all things parallel software development, from weekly thought
leadership blogs to news, videos, case studies, tutorials and more. Take a
look and join the conversation now. http://goparallel.sourceforge.net
_______________________________________________
Scikit-learn-general mailing list
Scikit-learn-general@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/scikit-learn-general

Reply via email to