Hello community,
here is the log from the commit of package python-sklearn-pandas for
openSUSE:Factory checked in at 2018-09-04 22:56:28
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-sklearn-pandas (Old)
and /work/SRC/openSUSE:Factory/.python-sklearn-pandas.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-sklearn-pandas"
Tue Sep 4 22:56:28 2018 rev:2 rq:632817 version:1.7.0
Changes:
--------
---
/work/SRC/openSUSE:Factory/python-sklearn-pandas/python-sklearn-pandas.changes
2018-06-15 14:37:27.239702383 +0200
+++
/work/SRC/openSUSE:Factory/.python-sklearn-pandas.new/python-sklearn-pandas.changes
2018-09-04 22:56:29.393066440 +0200
@@ -1,0 +2,14 @@
+Sun Sep 2 16:30:43 UTC 2018 - [email protected]
+
+- specfile:
+ * remove devel from noarch
+ * be more specific in %files section
+
+- update to version 1.7.0:
+ * Fix issues with unicode names in get_names (#160).
+ * Update to build using numpy==1.14 and python==3.6 (#154).
+ * Add strategy and fill_value parameters to CategoricalImputer to
+ allow imputing with values other than the mode (#144), (#161).
+ * Preserve input data types when no transform is supplied (#138).
+
+-------------------------------------------------------------------
Old:
----
sklearn-pandas-1.6.0.tar.gz
New:
----
sklearn-pandas-1.7.0.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-sklearn-pandas.spec ++++++
--- /var/tmp/diff_new_pack.QnTBTq/_old 2018-09-04 22:56:30.097068844 +0200
+++ /var/tmp/diff_new_pack.QnTBTq/_new 2018-09-04 22:56:30.101068858 +0200
@@ -18,17 +18,21 @@
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
Name: python-sklearn-pandas
-Version: 1.6.0
+Version: 1.7.0
Release: 0
Summary: Pandas integration with sklearn
-License: Zlib and BSD-2-Clause
+License: Zlib AND BSD-2-Clause
Group: Development/Languages/Python
-Url: https://github.com/paulgb/sklearn-pandas
+URL: https://github.com/paulgb/sklearn-pandas
Source:
https://files.pythonhosted.org/packages/source/s/sklearn-pandas/sklearn-pandas-%{version}.tar.gz
-BuildRequires: %{python_module devel}
BuildRequires: %{python_module setuptools}
BuildRequires: fdupes
BuildRequires: python-rpm-macros
+Requires: python-numpy >= 1.6.1
+Requires: python-pandas >= 0.11.0
+Requires: python-scikit-learn >= 0.15.0
+Requires: python-scipy >= 0.14
+BuildArch: noarch
# SECTION test requirements
BuildRequires: %{python_module mock}
BuildRequires: %{python_module numpy >= 1.6.1}
@@ -37,12 +41,6 @@
BuildRequires: %{python_module scikit-learn >= 0.15.0}
BuildRequires: %{python_module scipy >= 0.14}
# /SECTION
-Requires: python-numpy >= 1.6.1
-Requires: python-pandas >= 0.11.0
-Requires: python-scikit-learn >= 0.15.0
-Requires: python-scipy >= 0.14
-BuildArch: noarch
-
%python_subpackages
%description
@@ -62,6 +60,6 @@
%files %{python_files}
%doc README.rst
%license LICENSE
-%{python_sitelib}/*
+%{python_sitelib}/sklearn_pandas*
%changelog
++++++ sklearn-pandas-1.6.0.tar.gz -> sklearn-pandas-1.7.0.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/sklearn-pandas-1.6.0/PKG-INFO
new/sklearn-pandas-1.7.0/PKG-INFO
--- old/sklearn-pandas-1.6.0/PKG-INFO 2017-10-28 15:46:31.000000000 +0200
+++ new/sklearn-pandas-1.7.0/PKG-INFO 2018-08-15 14:16:05.000000000 +0200
@@ -1,6 +1,6 @@
Metadata-Version: 1.0
Name: sklearn-pandas
-Version: 1.6.0
+Version: 1.7.0
Summary: Pandas integration with sklearn
Home-page: https://github.com/paulgb/sklearn-pandas
Author: Israel Saeta Pérez
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/sklearn-pandas-1.6.0/README.rst
new/sklearn-pandas-1.7.0/README.rst
--- old/sklearn-pandas-1.6.0/README.rst 2017-10-28 15:45:16.000000000 +0200
+++ new/sklearn-pandas-1.7.0/README.rst 2018-08-15 14:15:41.000000000 +0200
@@ -57,7 +57,7 @@
>>> data = pd.DataFrame({'pet': ['cat', 'dog', 'dog', 'fish', 'cat',
'dog', 'cat', 'fish'],
... 'children': [4., 6, 3, 3, 2, 3, 5, 4],
- ... 'salary': [90, 24, 44, 27, 32, 59, 36, 27]})
+ ... 'salary': [90., 24, 44, 27, 32, 59, 36, 27]})
Transformation Mapping
----------------------
@@ -106,7 +106,7 @@
>>> sample = pd.DataFrame({'pet': ['cat'], 'children': [5.]})
>>> np.round(mapper.transform(sample), 2)
- array([[ 1. , 0. , 0. , 1.04]])
+ array([[1. , 0. , 0. , 1.04]])
Output features names
@@ -190,14 +190,14 @@
... ], df_out=True)
>>> np.round(mapper_df.fit_transform(data.copy()), 2)
pet_cat pet_dog pet_fish children
- 0 1.0 0.0 0.0 0.21
- 1 0.0 1.0 0.0 1.88
- 2 0.0 1.0 0.0 -0.63
- 3 0.0 0.0 1.0 -0.63
- 4 1.0 0.0 0.0 -1.46
- 5 0.0 1.0 0.0 -0.63
- 6 1.0 0.0 0.0 1.04
- 7 0.0 0.0 1.0 0.21
+ 0 1 0 0 0.21
+ 1 0 1 0 1.88
+ 2 0 1 0 -0.63
+ 3 0 0 1 -0.63
+ 4 1 0 0 -1.46
+ 5 0 1 0 -0.63
+ 6 1 0 0 1.04
+ 7 0 0 1 0.21
The names for the columns are the same ones present in the
``transformed_names_``
attribute.
@@ -251,14 +251,14 @@
... ('children', None)
... ])
>>> np.round(mapper3.fit_transform(data.copy()))
- array([[ 1., 0., 0., 4.],
- [ 0., 1., 0., 6.],
- [ 0., 1., 0., 3.],
- [ 0., 0., 1., 3.],
- [ 1., 0., 0., 2.],
- [ 0., 1., 0., 3.],
- [ 1., 0., 0., 5.],
- [ 0., 0., 1., 4.]])
+ array([[1., 0., 0., 4.],
+ [0., 1., 0., 6.],
+ [0., 1., 0., 3.],
+ [0., 0., 1., 3.],
+ [1., 0., 0., 2.],
+ [0., 1., 0., 3.],
+ [1., 0., 0., 5.],
+ [0., 0., 1., 4.]])
Applying a default transformer
******************************
@@ -329,11 +329,11 @@
... 'col3': [0, 0, 0, None, None]
... })
>>> mapper6.fit_transform(data6)
- array([[ 1., 1., 0.],
- [ 1., 0., 0.],
- [ 1., 1., 0.],
- [ 2., 1., 0.],
- [ 3., 1., 0.]])
+ array([[1., 1., 0.],
+ [1., 0., 0.],
+ [1., 1., 0.],
+ [2., 1., 0.],
+ [3., 1., 0.]])
Feature selection and other supervised transformations
@@ -344,14 +344,14 @@
>>> from sklearn.feature_selection import SelectKBest, chi2
>>> mapper_fs = DataFrameMapper([(['children','salary'], SelectKBest(chi2,
k=1))])
>>> mapper_fs.fit_transform(data[['children','salary']], data['pet'])
- array([[ 90.],
- [ 24.],
- [ 44.],
- [ 27.],
- [ 32.],
- [ 59.],
- [ 36.],
- [ 27.]])
+ array([[90.],
+ [24.],
+ [44.],
+ [27.],
+ [32.],
+ [59.],
+ [36.],
+ [27.]])
Working with sparse features
****************************
@@ -385,11 +385,11 @@
**********************
Since the ``scikit-learn`` ``Imputer`` transformer currently only works with
-numbers, ``sklearn-pandas`` provides an equivalent helper transformer that do
-work with strings, substituting null values with the most frequent value in
-that column.
+numbers, ``sklearn-pandas`` provides an equivalent helper transformer that
+works with strings, substituting null values with the most frequent value in
+that column. Alternatively, you can specify a fixed value to use.
-Example:
+Example: imputing with the mode:
>>> from sklearn_pandas import CategoricalImputer
>>> data = np.array(['a', 'b', 'b', np.nan], dtype=object)
@@ -397,10 +397,27 @@
>>> imputer.fit_transform(data)
array(['a', 'b', 'b', 'b'], dtype=object)
+Example: imputing with a fixed value:
+
+ >>> from sklearn_pandas import CategoricalImputer
+ >>> data = np.array(['a', 'b', 'b', np.nan], dtype=object)
+ >>> imputer = CategoricalImputer(strategy='fixed_value', replacement='a')
+ >>> imputer.fit_transform(data)
+ array(['a', 'b', 'b', 'a'], dtype=object)
+
Changelog
---------
+1.7.0 (2018-08-15)
+******************
+* Fix issues with unicode names in ``get_names`` (#160).
+* Update to build using ``numpy==1.14`` and ``python==3.6`` (#154).
+* Add ``strategy`` and ``replacement`` parameters to ``CategoricalImputer`` to
allow imputing
+ with values other than the mode (#144).
+* Preserve input data types when no transform is supplied (#138).
+
+
1.6.0 (2017-10-28)
******************
* Add column name to exception during fit/transform (#110).
@@ -477,7 +494,9 @@
Other contributors:
+* Ariel Rossanigo (@arielrossanigo)
* Arnau Gil Amat (@arnau126)
+* Assaf Ben-David (@AssafBenDavid)
* Cal Paterson (@calpaterson)
* @defvorfu
* Gustavo Sena Mafra (@gsmafra)
@@ -486,6 +505,8 @@
* Jimmy Wan (@jimmywan)
* Olivier Grisel (@ogrisel)
* Paul Butler (@paulgb)
+* Richard Miller (@rwjmiller)
* Ritesh Agrawal (@ragrawal)
+* Timothy Sweetser (@hacktuarial)
* Vitaley Zaretskey (@vzaretsk)
* Zac Stewart (@zacstewart)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/sklearn-pandas-1.6.0/setup.cfg
new/sklearn-pandas-1.7.0/setup.cfg
--- old/sklearn-pandas-1.6.0/setup.cfg 2017-10-28 15:46:31.000000000 +0200
+++ new/sklearn-pandas-1.7.0/setup.cfg 2018-08-15 14:16:05.000000000 +0200
@@ -4,4 +4,5 @@
[egg_info]
tag_build =
tag_date = 0
+tag_svn_revision = 0
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/sklearn-pandas-1.6.0/sklearn_pandas/__init__.py
new/sklearn-pandas-1.7.0/sklearn_pandas/__init__.py
--- old/sklearn-pandas-1.6.0/sklearn_pandas/__init__.py 2017-10-28
15:42:48.000000000 +0200
+++ new/sklearn-pandas-1.7.0/sklearn_pandas/__init__.py 2018-08-15
14:15:41.000000000 +0200
@@ -1,4 +1,4 @@
-__version__ = '1.6.0'
+__version__ = '1.7.0'
from .dataframe_mapper import DataFrameMapper # NOQA
from .cross_validation import cross_val_score, GridSearchCV,
RandomizedSearchCV # NOQA
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/sklearn-pandas-1.6.0/sklearn_pandas/categorical_imputer.py
new/sklearn-pandas-1.7.0/sklearn_pandas/categorical_imputer.py
--- old/sklearn-pandas-1.6.0/sklearn_pandas/categorical_imputer.py
2017-04-29 18:57:39.000000000 +0200
+++ new/sklearn-pandas-1.7.0/sklearn_pandas/categorical_imputer.py
2018-08-05 17:20:13.000000000 +0200
@@ -33,6 +33,18 @@
copy : boolean, optional (default=True)
If True, a copy of X will be created.
+ strategy : string, optional (default = 'mode')
+ If set to 'mode', replace all instances of `missing_values`
+ with the modal value. Otherwise, replace with
+ the value specified via `replacement`.
+
+ replacement : string, optional (default='?')
+ The value that all instances of `missing_values` are replaced
+ with if `strategy` is not set to 'mode'. This is useful if
+ you don't want to impute with the mode, or if there are multiple
+ modes in your data and you want to choose a particular one. If
+ `strategy` is set to `mode`, this parameter is ignored.
+
Attributes
----------
fill_ : str
@@ -40,9 +52,29 @@
"""
- def __init__(self, missing_values='NaN', copy=True):
+ def __init__(
+ self,
+ missing_values='NaN',
+ strategy='mode',
+ replacement=None,
+ copy=True
+ ):
self.missing_values = missing_values
self.copy = copy
+ self.replacement = replacement
+ self.strategy = strategy
+
+ strategies = ['fixed_value', 'mode']
+ if self.strategy not in strategies:
+ raise ValueError(
+ 'Strategy {0} not in {1}'.format(self.strategy, strategies)
+ )
+
+ if self.strategy == 'fixed_value' and self.replacement is None:
+ raise ValueError(
+ 'Please specify a value for \'replacement\''
+ 'when using the fixed_value strategy.'
+ )
def fit(self, X, y=None):
"""
@@ -63,9 +95,13 @@
mask = _get_mask(X, self.missing_values)
X = X[~mask]
-
- modes = pd.Series(X).mode()
+ if self.strategy == 'mode':
+ modes = pd.Series(X).mode()
+ elif self.strategy == 'fixed_value':
+ modes = np.array([self.replacement])
if modes.shape[0] == 0:
+ raise ValueError('Data is empty or all values are null')
+ elif modes.shape[0] > 1:
raise ValueError('No value is repeated more than '
'once in the column')
else:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/sklearn-pandas-1.6.0/sklearn_pandas/dataframe_mapper.py
new/sklearn-pandas-1.7.0/sklearn_pandas/dataframe_mapper.py
--- old/sklearn-pandas-1.6.0/sklearn_pandas/dataframe_mapper.py 2017-10-22
19:44:45.000000000 +0200
+++ new/sklearn-pandas-1.7.0/sklearn_pandas/dataframe_mapper.py 2018-08-05
19:04:13.000000000 +0200
@@ -253,13 +253,26 @@
else:
names = _get_feature_names(transformer)
if names is not None and len(names) == num_cols:
- return [name + '_' + str(o) for o in names]
+ return ['%s_%s' % (name, o) for o in names]
# otherwise, return name concatenated with '_1', '_2', etc.
else:
return [name + '_' + str(o) for o in range(num_cols)]
else:
return [name]
+ def get_dtypes(self, extracted):
+ dtypes_features = [self.get_dtype(ex) for ex in extracted]
+ return [dtype for dtype_feature in dtypes_features
+ for dtype in dtype_feature]
+
+ def get_dtype(self, ex):
+ if isinstance(ex, np.ndarray) or sparse.issparse(ex):
+ return [ex.dtype] * ex.shape[1]
+ elif isinstance(ex, pd.DataFrame):
+ return list(ex.dtypes)
+ else:
+ raise TypeError(type(ex))
+
def transform(self, X):
"""
Transform the given data. Assumes that fit has already been called.
@@ -323,8 +336,15 @@
else:
index = None
- return pd.DataFrame(stacked,
- columns=self.transformed_names_,
- index=index)
+ # output different data types, if appropriate
+ dtypes = self.get_dtypes(extracted)
+ df_out = pd.DataFrame(
+ stacked,
+ columns=self.transformed_names_,
+ index=index)
+ # preserve types
+ for col, dtype in zip(self.transformed_names_, dtypes):
+ df_out[col] = df_out[col].astype(dtype)
+ return df_out
else:
return stacked
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore'
old/sklearn-pandas-1.6.0/sklearn_pandas.egg-info/PKG-INFO
new/sklearn-pandas-1.7.0/sklearn_pandas.egg-info/PKG-INFO
--- old/sklearn-pandas-1.6.0/sklearn_pandas.egg-info/PKG-INFO 2017-10-28
15:46:31.000000000 +0200
+++ new/sklearn-pandas-1.7.0/sklearn_pandas.egg-info/PKG-INFO 2018-08-15
14:15:57.000000000 +0200
@@ -1,6 +1,6 @@
Metadata-Version: 1.0
Name: sklearn-pandas
-Version: 1.6.0
+Version: 1.7.0
Summary: Pandas integration with sklearn
Home-page: https://github.com/paulgb/sklearn-pandas
Author: Israel Saeta Pérez