Hello community, here is the log from the commit of package python-swifter for openSUSE:Factory checked in at 2020-02-11 22:25:50 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-swifter (Old) and /work/SRC/openSUSE:Factory/.python-swifter.new.26092 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-swifter" Tue Feb 11 22:25:50 2020 rev:3 rq:773484 version:0.301 Changes: -------- --- /work/SRC/openSUSE:Factory/python-swifter/python-swifter.changes 2020-02-03 11:13:34.277852604 +0100 +++ /work/SRC/openSUSE:Factory/.python-swifter.new.26092/python-swifter.changes 2020-02-11 22:26:19.415597755 +0100 @@ -1,0 +2,6 @@ +Mon Feb 10 15:09:53 UTC 2020 - Todd R <[email protected]> + +- Update to 0.301 + * Following pandas release v1.0.0, removing deprecated keyword args "broadcast" and "reduce" + +------------------------------------------------------------------- Old: ---- swifter-0.300.tar.gz New: ---- swifter-0.301.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-swifter.spec ++++++ --- /var/tmp/diff_new_pack.F64bTR/_old 2020-02-11 22:26:19.871598005 +0100 +++ /var/tmp/diff_new_pack.F64bTR/_new 2020-02-11 22:26:19.875598008 +0100 @@ -19,7 +19,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} %define skip_python2 1 Name: python-swifter -Version: 0.300 +Version: 0.301 Release: 0 Summary: Tool to speed up pandas calculations License: MIT ++++++ swifter-0.300.tar.gz -> swifter-0.301.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/swifter-0.300/.circleci/config.yml new/swifter-0.301/.circleci/config.yml --- old/swifter-0.300/.circleci/config.yml 2020-01-16 00:03:39.000000000 +0100 +++ new/swifter-0.301/.circleci/config.yml 2020-02-05 07:01:02.000000000 +0100 @@ -2,7 +2,7 @@ orbs: codecov: codecov/[email protected] jobs: - unittest-lint: + unittest-lint-codecov: parallelism: 1 working_directory: ~/repo docker: @@ -34,4 +34,4 @@ version: 2 build-and-test: jobs: - - unittest-lint + - unittest-lint-codecov diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/swifter-0.300/docs/changelog.md new/swifter-0.301/docs/changelog.md --- old/swifter-0.300/docs/changelog.md 2020-01-16 00:03:39.000000000 +0100 +++ new/swifter-0.301/docs/changelog.md 2020-02-05 07:01:02.000000000 +0100 @@ -1,5 +1,8 @@ # Changelog +## Version 0.301 +Following pandas release v1.0.0, removing deprecated keyword args "broadcast" and "reduce" + ## Version 0.300 Added new `applymap` method for pandas dataframes. `df.swifter.applymap(...)` diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/swifter-0.300/docs/documentation.md new/swifter-0.301/docs/documentation.md --- old/swifter-0.300/docs/documentation.md 2020-01-16 00:03:39.000000000 +0100 +++ new/swifter-0.301/docs/documentation.md 2020-02-05 07:01:02.000000000 +0100 @@ -27,6 +27,9 @@ `kwds` : Additional keyword arguments will be passed as keywords to the function +NOTE: docstring taken from pandas documentation. + + ## 2. `pandas.DataFrame.swifter.apply` Efficiently apply any function to a pandas dataframe in the fastest available manner. @@ -35,9 +38,7 @@ def pandas.DataFrame.swifter.apply( func, axis=0, - broadcast=None, raw=False, - reduce=None, result_type=None, args=(), **kwds @@ -53,21 +54,10 @@ * 0 or 'index': apply function to each column. * 1 or 'columns': apply function to each row. -`broadcast` : bool, optional. Only relevant for aggregation functions: - -False or None : returns a Series whose length is the length of the index or the number of columns (based on the axis parameter) -True : results will be broadcast to the original shape of the frame, the original index and columns will be retained. -Deprecated since version 0.23.0: This argument will be removed in a future version, replaced by result_type='broadcast'. - `raw` : bool, default False - False : passes each row or column as a Series to the function. True : the passed function will receive ndarray objects instead. If you are just applying a NumPy reduction function this will achieve much better performance. -`reduce` : bool or None, default None. Try to apply reduction procedures. If the DataFrame is empty, apply will use reduce to determine whether the result should be a Series or a DataFrame. If reduce=None (the default), apply's return value will be guessed by calling func on an empty Series (note: while guessing, exceptions raised by func will be ignored). If reduce=True a Series will always be returned, and if reduce=False a DataFrame will always be returned. - -Deprecated since pandas version 0.23.0: This argument will be removed in a future version, replaced by result_type='reduce'. - `result_type` : {'expand', 'reduce', 'broadcast', None}, default None. These only act when axis=1 (columns): 'expand' : list-like results will be turned into columns. @@ -75,12 +65,11 @@ 'broadcast' : results will be broadcast to the original shape of the DataFrame, the original index and columns will be retained. The default behaviour (None) depends on the return value of the applied function: list-like results will be returned as a Series of those. However if the apply function returns a Series these are expanded to columns. -New in pandas version 0.23.0. - `args` : tuple. Positional arguments to pass to func in addition to the array/series. `kwds` : Additional keyword arguments to pass as keywords arguments to func. +NOTE: docstring taken from pandas documentation. **returns:** diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/swifter-0.300/setup.py new/swifter-0.301/setup.py --- old/swifter-0.300/setup.py 2020-01-16 00:03:39.000000000 +0100 +++ new/swifter-0.301/setup.py 2020-02-05 07:01:02.000000000 +0100 @@ -3,12 +3,12 @@ setup( name="swifter", packages=["swifter"], # this must be the same as the name above - version="0.300", + version="0.301", description="A package which efficiently applies any function to a pandas dataframe or series in the fastest available manner", author="Jason Carpenter", author_email="[email protected]", url="https://github.com/jmcarpenter2/swifter", # use the URL to the github repo - download_url="https://github.com/jmcarpenter2/swifter/archive/0.300.tar.gz", + download_url="https://github.com/jmcarpenter2/swifter/archive/0.301.tar.gz", keywords=["pandas", "dask", "apply", "function", "parallelize", "vectorize"], install_requires=["pandas>=0.23.0", "psutil", "dask[complete]>=0.19.0", "tqdm>=4.33.0", "ipywidgets>=7.0.0", "parso>0.4.0", "numba"], classifiers=[], diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/swifter-0.300/swifter/__init__.py new/swifter-0.301/swifter/__init__.py --- old/swifter-0.300/swifter/__init__.py 2020-01-16 00:03:39.000000000 +0100 +++ new/swifter-0.301/swifter/__init__.py 2020-02-05 07:01:02.000000000 +0100 @@ -3,4 +3,4 @@ from .swifter import SeriesAccessor, DataFrameAccessor __all__ = ["SeriesAccessor, DataFrameAccessor"] -__version__ = "0.300" +__version__ = "0.301" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/swifter-0.300/swifter/swifter.py new/swifter-0.301/swifter/swifter.py --- old/swifter-0.300/swifter/swifter.py 2020-01-16 00:03:39.000000000 +0100 +++ new/swifter-0.301/swifter/swifter.py 2020-02-05 07:01:02.000000000 +0100 @@ -118,6 +118,7 @@ self._scheduler, self._progress_bar, self._progress_bar_desc, + self._allow_dask_on_strings, **kwds ) @@ -263,33 +264,20 @@ @pd.api.extensions.register_dataframe_accessor("swifter") class DataFrameAccessor(_SwifterObject): - def _wrapped_apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, result_type=None, args=(), **kwds): + def _wrapped_apply(self, func, axis=0, raw=None, result_type=None, args=(), **kwds): def wrapped(): with suppress_stdout_stderr(): self._obj.iloc[: self._SAMPLE_SIZE, :].apply( - func, - axis=axis, - broadcast=broadcast, - raw=raw, - reduce=reduce, - result_type=result_type, - args=args, - **kwds + func, axis=axis, raw=raw, result_type=result_type, args=args, **kwds ) return wrapped - def _dask_apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, result_type=None, *args, **kwds): + def _dask_apply(self, func, axis=0, raw=None, result_type=None, *args, **kwds): sample = self._obj.iloc[: self._npartitions * 2, :] with suppress_stdout_stderr(): - meta = sample.apply( - func, axis=axis, broadcast=broadcast, raw=raw, reduce=reduce, result_type=result_type, args=args, **kwds - ) + meta = sample.apply(func, axis=axis, raw=raw, result_type=result_type, args=args, **kwds) try: - if broadcast: - result_type = "broadcast" - elif reduce: - result_type = "reduce" with suppress_stdout_stderr(): # check that the dask apply matches the pandas apply tmp_df = ( @@ -321,20 +309,16 @@ else: apply_func = self._obj.apply - return apply_func( - func, axis=axis, broadcast=broadcast, raw=raw, reduce=reduce, result_type=result_type, args=args, **kwds - ) + return apply_func(func, axis=axis, raw=raw, result_type=result_type, args=args, **kwds) - def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, result_type=None, args=(), **kwds): + def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds): """ Apply the function to the DataFrame using swifter """ # If there are no rows return early using Pandas if not self._nrows: - return self._obj.apply( - func, axis=axis, broadcast=broadcast, raw=raw, reduce=reduce, result_type=result_type, args=args, **kwds - ) + return self._obj.apply(func, axis=axis, raw=raw, result_type=result_type, args=args, **kwds) sample = self._obj.iloc[: self._npartitions * 2, :] # check if input is string or if the user is overriding the string processing default @@ -344,16 +328,7 @@ with suppress_stdout_stderr(): tmp_df = func(sample, *args, **kwds) self._validate_apply( - sample.apply( - func, - axis=axis, - broadcast=broadcast, - raw=raw, - reduce=reduce, - result_type=result_type, - args=args, - **kwds - ).equals(tmp_df), + sample.apply(func, axis=axis, raw=raw, result_type=result_type, args=args, **kwds).equals(tmp_df), error_message="Vectorized function sample does not match pandas apply sample.", ) return func(self._obj, *args, **kwds) @@ -364,9 +339,7 @@ TypingError, KeyError, ): # if can't vectorize, estimate time to pandas apply - wrapped = self._wrapped_apply( - func, axis=axis, broadcast=broadcast, raw=raw, reduce=reduce, result_type=result_type, args=args, **kwds - ) + wrapped = self._wrapped_apply(func, axis=axis, raw=raw, result_type=result_type, args=args, **kwds) timed = timeit.timeit(wrapped, number=N_REPEATS) sample_proc_est = timed / N_REPEATS est_apply_duration = sample_proc_est / self._SAMPLE_SIZE * self._obj.shape[0] @@ -379,7 +352,7 @@ "Dask currently does not have an axis=0 apply implemented.\n" "More details at https://github.com/jmcarpenter2/swifter/issues/10" ) - return self._dask_apply(func, axis, broadcast, raw, reduce, result_type, *args, **kwds) + return self._dask_apply(func, axis, raw, result_type, *args, **kwds) else: # use pandas if self._progress_bar: tqdm.pandas(desc=self._progress_bar_desc or "Pandas Apply") @@ -387,16 +360,7 @@ else: apply_func = self._obj.apply - return apply_func( - func, - axis=axis, - broadcast=broadcast, - raw=raw, - reduce=reduce, - result_type=result_type, - args=args, - **kwds - ) + return apply_func(func, axis=axis, raw=raw, result_type=result_type, args=args, **kwds) def _wrapped_applymap(self, func): def wrapped(): @@ -492,7 +456,7 @@ class Transformation(_SwifterObject): def __init__( self, - obj, + pandas_obj, npartitions=None, dask_threshold=1, scheduler="processes", @@ -500,13 +464,13 @@ progress_bar_desc=None, allow_dask_on_strings=False, ): - super().__init__( - obj, npartitions, dask_threshold, scheduler, progress_bar, progress_bar_desc, allow_dask_on_strings + super(Transformation, self).__init__( + pandas_obj, npartitions, dask_threshold, scheduler, progress_bar, progress_bar_desc, allow_dask_on_strings ) - self._sample_pd = obj.iloc[: self._SAMPLE_SIZE] - self._obj_pd = obj - self._obj_dd = dd.from_pandas(obj, npartitions=npartitions) - self._nrows = obj.shape[0] + self._sample_pd = pandas_obj.iloc[: self._SAMPLE_SIZE] + self._obj_pd = pandas_obj + self._obj_dd = dd.from_pandas(pandas_obj, npartitions=npartitions) + self._nrows = pandas_obj.shape[0] def _wrapped_apply(self, func, *args, **kwds): def wrapped(): @@ -536,7 +500,7 @@ sample_proc_est = timed / N_REPEATS est_apply_duration = sample_proc_est / self._SAMPLE_SIZE * self._nrows - # No `allow_dask_processing` variable here, because we don't know the dtypes of the resampler object + # No `allow_dask_processing` variable here, because we don't know the dtypes of the transformation if est_apply_duration > self._dask_threshold: return self._dask_apply(func, *args, **kwds) else: # use pandas @@ -550,7 +514,7 @@ class Rolling(Transformation): def __init__( self, - obj, + pandas_obj, npartitions=None, dask_threshold=1, scheduler="processes", @@ -560,7 +524,7 @@ **kwds ): super(Rolling, self).__init__( - obj, npartitions, dask_threshold, scheduler, progress_bar, progress_bar_desc, allow_dask_on_strings + pandas_obj, npartitions, dask_threshold, scheduler, progress_bar, progress_bar_desc, allow_dask_on_strings ) self._rolling_kwds = kwds.copy() self._sample_original = self._sample_pd.copy() @@ -598,7 +562,7 @@ class Resampler(Transformation): def __init__( self, - obj, + pandas_obj, npartitions=None, dask_threshold=1, scheduler="processes", @@ -608,7 +572,7 @@ **kwds ): super(Resampler, self).__init__( - obj, npartitions, dask_threshold, scheduler, progress_bar, progress_bar_desc, allow_dask_on_strings + pandas_obj, npartitions, dask_threshold, scheduler, progress_bar, progress_bar_desc, allow_dask_on_strings ) self._resampler_kwds = kwds.copy() self._sample_original = self._sample_pd.copy() @@ -622,13 +586,6 @@ else None ) - def _wrapped_apply(self, func, *args, **kwds): - def wrapped(): - with suppress_stdout_stderr(): - self._sample_pd.apply(func, *args, **kwds) - - return wrapped - def _dask_apply(self, func, *args, **kwds): try: # check that the dask resampler apply matches the pandas apply @@ -652,23 +609,3 @@ except (AttributeError, ValueError, TypeError, KeyError): # use pandas -- no progress_apply available for resampler objects return self._obj_pd.apply(func, *args, **kwds) - - def apply(self, func, *args, **kwds): - """ - Apply the function to the resampler swifter object - """ - # if the resampled dataframe is empty, return early using Pandas - if not self._nrows: - return self._obj_pd.apply(func, args=args, **kwds) - - # estimate time to pandas apply - wrapped = self._wrapped_apply(func, *args, **kwds) - timed = timeit.timeit(wrapped, number=N_REPEATS) - sample_proc_est = timed / N_REPEATS - est_apply_duration = sample_proc_est / self._SAMPLE_SIZE * self._nrows - - # No `allow_dask_processing` variable here, because we don't know the dtypes of the resampler object - if est_apply_duration > self._dask_threshold: - return self._dask_apply(func, *args, **kwds) - else: # use pandas -- no progress_apply available for resampler objects - return self._obj_pd.apply(func, *args, **kwds) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/swifter-0.300/swifter/swifter_tests.py new/swifter-0.301/swifter/swifter_tests.py --- old/swifter-0.300/swifter/swifter_tests.py 2020-01-16 00:03:39.000000000 +0100 +++ new/swifter-0.301/swifter/swifter_tests.py 2020-02-05 07:01:02.000000000 +0100 @@ -151,7 +151,7 @@ sys.executable, "-c", "import pandas as pd; import numpy as np; import swifter; " - + "df = pd.DataFrame({'x': np.random.normal(size=4)}); " + + "df = pd.DataFrame({'x': np.random.normal(size=4)}, dtype='float32'); " + "df.swifter.progress_bar(enable=False).apply(lambda x: print(x.values))", ], stderr=subprocess.STDOUT, @@ -178,12 +178,12 @@ def test_rolling_apply_on_empty_dataframe(self): df = pd.DataFrame(columns=["x", "y"]) - pd_val = df.rolling(1).apply(math_agg_foo) - swifter_val = df.swifter.rolling(1).apply(math_agg_foo) + pd_val = df.rolling(1).apply(math_agg_foo, raw=True) + swifter_val = df.swifter.rolling(1).apply(math_agg_foo, raw=True) self.assertEqual(pd_val, swifter_val) def test_resample_apply_on_empty_dataframe(self): - df = pd.DataFrame(columns=["x", "y"], index=pd.DatetimeIndex(freq="3d", periods=0, start="2020/01/01")) + df = pd.DataFrame(columns=["x", "y"], index=pd.date_range(start="2020/01/01", periods=0)) pd_val = df.resample("1d").apply(math_agg_foo) swifter_val = df.swifter.resample("1d").apply(math_agg_foo) self.assertEqual(pd_val, swifter_val) @@ -268,13 +268,13 @@ df = pd.DataFrame({"x": np.random.normal(size=1_000_000), "y": np.random.uniform(size=1_000_000)}) start_pd = time.time() - pd_val = df.apply(math_agg_foo, axis=1, broadcast=True) + pd_val = df.apply(math_agg_foo, axis=1, result_type="broadcast") end_pd = time.time() pd_time = end_pd - start_pd start_swifter = time.time() swifter_val = df.swifter.progress_bar(desc="Nonvec math apply + broadcast ~ DF").apply( - math_agg_foo, axis=1, broadcast=True + math_agg_foo, axis=1, result_type="broadcast" ) end_swifter = time.time() swifter_time = end_swifter - start_swifter @@ -286,13 +286,13 @@ df = pd.DataFrame({"x": np.random.normal(size=1_000_000), "y": np.random.uniform(size=1_000_000)}) start_pd = time.time() - pd_val = df.apply(math_agg_foo, axis=1, reduce=True) + pd_val = df.apply(math_agg_foo, axis=1, result_type="reduce") end_pd = time.time() pd_time = end_pd - start_pd start_swifter = time.time() swifter_val = df.swifter.progress_bar(desc="Nonvec math apply + reduce ~ DF").apply( - math_agg_foo, axis=1, reduce=True + math_agg_foo, axis=1, result_type="reduce" ) end_swifter = time.time() swifter_time = end_swifter - start_swifter @@ -320,14 +320,16 @@ def test_nonvectorized_math_apply_on_small_rolling_dataframe(self): df = pd.DataFrame({"x": np.arange(0, 1000)}, index=pd.date_range("2019-01-1", "2020-01-1", periods=1000)) - pd_val = df.rolling("3T").apply(math_agg_foo) - swifter_val = df.swifter.rolling("3T").progress_bar(desc="Nonvec math apply ~ Rolling DF").apply(math_agg_foo) + pd_val = df.rolling("3T").apply(math_agg_foo, raw=True) + swifter_val = ( + df.swifter.rolling("3T").progress_bar(desc="Nonvec math apply ~ Rolling DF").apply(math_agg_foo, raw=True) + ) self.assertEqual(pd_val, swifter_val) def test_nonvectorized_math_apply_on_small_rolling_dataframe_no_progress_bar(self): df = pd.DataFrame({"x": np.arange(0, 1000)}, index=pd.date_range("2019-01-1", "2020-01-1", periods=1000)) - pd_val = df.rolling("3T").apply(math_agg_foo) - swifter_val = df.swifter.rolling("3T").progress_bar(enable=False).apply(math_agg_foo) + pd_val = df.rolling("3T").apply(math_agg_foo, raw=True) + swifter_val = df.swifter.rolling("3T").progress_bar(enable=False).apply(math_agg_foo, raw=True) self.assertEqual(pd_val, swifter_val) def test_vectorized_math_apply_on_large_rolling_dataframe(self): @@ -336,12 +338,12 @@ ) start_pd = time.time() - pd_val = df.rolling("3T").apply(sum) + pd_val = df.rolling("3T").apply(max, raw=True) end_pd = time.time() pd_time = end_pd - start_pd start_swifter = time.time() - swifter_val = df.swifter.rolling("3T").progress_bar(desc="Vec math apply ~ Rolling DF").apply(sum) + swifter_val = df.swifter.rolling("3T").progress_bar(desc="Vec math apply ~ Rolling DF").apply(max, raw=True) end_swifter = time.time() swifter_time = end_swifter - start_swifter @@ -350,16 +352,18 @@ def test_nonvectorized_math_apply_on_large_rolling_dataframe(self): df = pd.DataFrame( - {"x": np.arange(0, 2_000_000)}, index=pd.date_range("2019-01-1", "2020-01-1", periods=2_000_000) + {"x": np.arange(0, 1_000_000)}, index=pd.date_range("2019-01-1", "2020-01-1", periods=1_000_000) ) start_pd = time.time() - pd_val = df.rolling("3T").apply(math_agg_foo) + pd_val = df.rolling("3T").apply(math_agg_foo, raw=True) end_pd = time.time() pd_time = end_pd - start_pd start_swifter = time.time() - swifter_val = df.swifter.rolling("3T").progress_bar(desc="Nonvec math apply ~ Rolling DF").apply(math_agg_foo) + swifter_val = ( + df.swifter.rolling("3T").progress_bar(desc="Nonvec math apply ~ Rolling DF").apply(math_agg_foo, raw=True) + ) end_swifter = time.time() swifter_time = end_swifter - start_swifter @@ -407,7 +411,7 @@ self.assertLess(swifter_time, pd_time) def test_nonvectorized_math_applymap_on_large_dataframe(self): - df = pd.DataFrame({"x": np.random.normal(size=2_000_000), "y": np.random.uniform(size=2_000_000)}) + df = pd.DataFrame({"x": np.random.normal(size=1_000_000), "y": np.random.uniform(size=1_000_000)}) start_pd = time.time() pd_val = df.applymap(math_foo)
