Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-pandas for openSUSE:Factory checked in at 2022-01-07 12:44:42 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-pandas (Old) and /work/SRC/openSUSE:Factory/.python-pandas.new.1896 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-pandas" Fri Jan 7 12:44:42 2022 rev:39 rq:943930 version:1.3.5 Changes: -------- --- /work/SRC/openSUSE:Factory/python-pandas/python-pandas.changes 2021-10-25 15:17:44.869687428 +0200 +++ /work/SRC/openSUSE:Factory/.python-pandas.new.1896/python-pandas.changes 2022-01-07 12:45:13.963797606 +0100 @@ -1,0 +2,25 @@ +Tue Jan 4 21:56:55 UTC 2022 - Ben Greiner <c...@bnavigator.de> + +- Update to version 1.3.5 + * Fixed regression in Series.equals() when comparing floats with + dtype object to None (GH44190) + * Fixed regression in merge_asof() raising error when array was + supplied as join key (GH42844) + * Fixed regression when resampling DataFrame with DateTimeIndex + with empty groups and uint8, uint16 or uint32 columns + incorrectly raising RuntimeError (GH43329) + * Fixed regression in creating a DataFrame from a timezone-aware + Timestamp scalar near a Daylight Savings Time transition + (GH42505) + * Fixed performance regression in read_csv() (GH44106) + * Fixed regression in Series.duplicated() and + Series.drop_duplicates() when Series has Categorical dtype with + boolean categories (GH44351) + * Fixed regression in GroupBy.sum() with timedelta64[ns] dtype + containing NaT failing to treat that value as NA (GH42659) + * Fixed regression in RollingGroupby.cov() and + RollingGroupby.corr() when other had the same shape as each + group would incorrectly return superfluous groups in the result + (GH42915) + +------------------------------------------------------------------- Old: ---- pandas-1.3.4.tar.gz New: ---- pandas-1.3.5.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-pandas.spec ++++++ --- /var/tmp/diff_new_pack.6VeBgd/_old 2022-01-07 12:45:14.783798175 +0100 +++ /var/tmp/diff_new_pack.6VeBgd/_new 2022-01-07 12:45:14.787798178 +0100 @@ -1,7 +1,7 @@ # # spec file # -# Copyright (c) 2021 SUSE LLC +# Copyright (c) 2022 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -28,7 +28,7 @@ %bcond_with test %endif Name: python-pandas%{psuffix} -Version: 1.3.4 +Version: 1.3.5 Release: 0 Summary: Python data structures for data analysis, time series, and statistics License: BSD-3-Clause @@ -88,12 +88,14 @@ BuildRequires: %{python_module beautifulsoup4 >= 4.6.0} BuildRequires: %{python_module hypothesis} BuildRequires: %{python_module lxml >= 4.3.0} +BuildRequires: %{python_module matplotlib} BuildRequires: %{python_module numexpr >= 2.7.0} BuildRequires: %{python_module openpyxl >= 3.0.0} BuildRequires: %{python_module pandas = %{version}} BuildRequires: %{python_module pytest >= 6.0} BuildRequires: %{python_module pytest-mock} BuildRequires: %{python_module pytest-xdist} +BuildRequires: %{python_module scipy} BuildRequires: %{python_module xlrd >= 1.2.0} BuildRequires: xclip BuildRequires: xvfb-run ++++++ pandas-1.3.4.tar.gz -> pandas-1.3.5.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/PKG-INFO new/pandas-1.3.5/PKG-INFO --- old/pandas-1.3.4/PKG-INFO 2021-10-17 14:40:38.766444700 +0200 +++ new/pandas-1.3.5/PKG-INFO 2021-12-12 11:23:38.180560000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: pandas -Version: 1.3.4 +Version: 1.3.5 Summary: Powerful data structures for data analysis, time series, and statistics Home-page: https://pandas.pydata.org Author: The Pandas Development Team diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/doc/source/reference/io.rst new/pandas-1.3.5/doc/source/reference/io.rst --- old/pandas-1.3.4/doc/source/reference/io.rst 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/doc/source/reference/io.rst 2021-12-12 11:15:39.000000000 +0100 @@ -57,7 +57,7 @@ ExcelWriter -.. currentmodule:: pandas.io.json +.. currentmodule:: pandas JSON ~~~~ @@ -65,7 +65,10 @@ :toctree: api/ read_json - to_json + json_normalize + DataFrame.to_json + +.. currentmodule:: pandas.io.json .. autosummary:: :toctree: api/ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/doc/source/user_guide/io.rst new/pandas-1.3.5/doc/source/user_guide/io.rst --- old/pandas-1.3.4/doc/source/user_guide/io.rst 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/doc/source/user_guide/io.rst 2021-12-12 11:20:47.000000000 +0100 @@ -2977,6 +2977,7 @@ Even read XML from AWS S3 buckets such as Python Software Foundation's IRS 990 Form: .. ipython:: python + :okwarning: df = pd.read_xml( "s3://irs-form-990/201923199349319487_public.xml", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/doc/source/whatsnew/index.rst new/pandas-1.3.5/doc/source/whatsnew/index.rst --- old/pandas-1.3.4/doc/source/whatsnew/index.rst 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/doc/source/whatsnew/index.rst 2021-12-12 11:20:47.000000000 +0100 @@ -16,6 +16,7 @@ .. toctree:: :maxdepth: 2 + v1.3.5 v1.3.4 v1.3.3 v1.3.2 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/doc/source/whatsnew/v1.3.4.rst new/pandas-1.3.5/doc/source/whatsnew/v1.3.4.rst --- old/pandas-1.3.4/doc/source/whatsnew/v1.3.4.rst 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/doc/source/whatsnew/v1.3.4.rst 2021-12-12 11:15:39.000000000 +0100 @@ -54,4 +54,4 @@ Contributors ~~~~~~~~~~~~ -.. contributors:: v1.3.3..v1.3.4|HEAD +.. contributors:: v1.3.3..v1.3.4 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/doc/source/whatsnew/v1.3.5.rst new/pandas-1.3.5/doc/source/whatsnew/v1.3.5.rst --- old/pandas-1.3.4/doc/source/whatsnew/v1.3.5.rst 1970-01-01 01:00:00.000000000 +0100 +++ new/pandas-1.3.5/doc/source/whatsnew/v1.3.5.rst 2021-12-12 11:15:39.000000000 +0100 @@ -0,0 +1,33 @@ +.. _whatsnew_135: + +What's new in 1.3.5 (December 12, 2021) +--------------------------------------- + +These are the changes in pandas 1.3.5. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_135.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`Series.equals` when comparing floats with dtype object to None (:issue:`44190`) +- Fixed regression in :func:`merge_asof` raising error when array was supplied as join key (:issue:`42844`) +- Fixed regression when resampling :class:`DataFrame` with :class:`DateTimeIndex` with empty groups and ``uint8``, ``uint16`` or ``uint32`` columns incorrectly raising ``RuntimeError`` (:issue:`43329`) +- Fixed regression in creating a :class:`DataFrame` from a timezone-aware :class:`Timestamp` scalar near a Daylight Savings Time transition (:issue:`42505`) +- Fixed performance regression in :func:`read_csv` (:issue:`44106`) +- Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`) +- Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`) +- Fixed regression in :meth:`.RollingGroupby.cov` and :meth:`.RollingGroupby.corr` when ``other`` had the same shape as each group would incorrectly return superfluous groups in the result (:issue:`42915`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_135.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.3.4..v1.3.5|HEAD diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/_libs/groupby.pyi new/pandas-1.3.5/pandas/_libs/groupby.pyi --- old/pandas-1.3.4/pandas/_libs/groupby.pyi 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/_libs/groupby.pyi 2021-12-12 11:20:47.000000000 +0100 @@ -53,6 +53,7 @@ values: np.ndarray, # ndarray[complexfloating_t, ndim=2] labels: np.ndarray, # const intp_t[:] min_count: int = ..., + datetimelike: bool = ..., ) -> None: ... def group_prod( out: np.ndarray, # floating[:, ::1] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/_libs/groupby.pyx new/pandas-1.3.5/pandas/_libs/groupby.pyx --- old/pandas-1.3.4/pandas/_libs/groupby.pyx 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/_libs/groupby.pyx 2021-12-12 11:20:47.000000000 +0100 @@ -483,7 +483,8 @@ int64_t[::1] counts, ndarray[add_t, ndim=2] values, const intp_t[:] labels, - Py_ssize_t min_count=0) -> None: + Py_ssize_t min_count=0, + bint datetimelike=False) -> None: """ Only aggregates on axis=0 using Kahan summation """ @@ -545,7 +546,14 @@ val = values[i, j] # not nan - if val == val: + # With dt64/td64 values, values have been cast to float64 + # instead if int64 for group_add, but the logic + # is otherwise the same as in _treat_as_na + if val == val and not ( + add_t is float64_t + and datetimelike + and val == <float64_t>NPY_NAT + ): nobs[lab, j] += 1 y = val - compensation[lab, j] t = sumx[lab, j] + y diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/_libs/missing.pyx new/pandas-1.3.5/pandas/_libs/missing.pyx --- old/pandas-1.3.4/pandas/_libs/missing.pyx 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/_libs/missing.pyx 2021-12-12 11:20:47.000000000 +0100 @@ -64,7 +64,7 @@ elif left is NaT: return right is NaT elif util.is_float_object(left): - if nan_matches_none and right is None: + if nan_matches_none and right is None and util.is_nan(left): return True return ( util.is_nan(left) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/_libs/ops_dispatch.pyx new/pandas-1.3.5/pandas/_libs/ops_dispatch.pyx --- old/pandas-1.3.4/pandas/_libs/ops_dispatch.pyx 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/_libs/ops_dispatch.pyx 2021-12-12 11:20:47.000000000 +0100 @@ -26,7 +26,7 @@ "true_divide": "truediv", "power": "pow", "remainder": "mod", - "divide": "div", + "divide": "truediv", "equal": "eq", "not_equal": "ne", "less": "lt", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/_version.py new/pandas-1.3.5/pandas/_version.py --- old/pandas-1.3.4/pandas/_version.py 2021-10-17 14:40:38.766444700 +0200 +++ new/pandas-1.3.5/pandas/_version.py 2021-12-12 11:23:38.190560000 +0100 @@ -8,11 +8,11 @@ version_json = ''' { - "date": "2021-10-17T13:27:58+0100", + "date": "2021-12-12T10:20:48+0000", "dirty": false, "error": null, - "full-revisionid": "945c9ed766a61c7d2c0a7cbb251b6edebf9cb7d5", - "version": "1.3.4" + "full-revisionid": "66e3805b8cabe977f40c05259cc3fcf7ead5687d", + "version": "1.3.5" } ''' # END VERSION_JSON diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/compat/numpy/__init__.py new/pandas-1.3.5/pandas/compat/numpy/__init__.py --- old/pandas-1.3.4/pandas/compat/numpy/__init__.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/compat/numpy/__init__.py 2021-12-12 11:20:47.000000000 +0100 @@ -12,9 +12,15 @@ np_version_under1p18 = _nlv < Version("1.18") np_version_under1p19 = _nlv < Version("1.19") np_version_under1p20 = _nlv < Version("1.20") +np_version_under1p22 = _nlv < Version("1.22") is_numpy_dev = _nlv.dev is not None _min_numpy_ver = "1.17.3" +if is_numpy_dev or not np_version_under1p22: + np_percentile_argname = "method" +else: + np_percentile_argname = "interpolation" + if _nlv < Version(_min_numpy_ver): raise ImportError( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/core/algorithms.py new/pandas-1.3.5/pandas/core/algorithms.py --- old/pandas-1.3.4/pandas/core/algorithms.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/core/algorithms.py 2021-12-12 11:20:47.000000000 +0100 @@ -139,7 +139,7 @@ # i.e. all-bool Categorical, BooleanArray try: return np.asarray(values).astype("uint8", copy=False), values.dtype - except TypeError: + except (TypeError, ValueError): # GH#42107 we have pd.NAs present return np.asarray(values), values.dtype diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/core/arrays/datetimes.py new/pandas-1.3.5/pandas/core/arrays/datetimes.py --- old/pandas-1.3.4/pandas/core/arrays/datetimes.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/core/arrays/datetimes.py 2021-12-12 11:20:47.000000000 +0100 @@ -2067,8 +2067,13 @@ ) if tz and inferred_tz: # two timezones: convert to intended from base UTC repr - data = tzconversion.tz_convert_from_utc(data.view("i8"), tz) - data = data.view(DT64NS_DTYPE) + if data.dtype == "i8": + # GH#42505 + # by convention, these are _already_ UTC, e.g + return data.view(DT64NS_DTYPE), tz, None + + utc_vals = tzconversion.tz_convert_from_utc(data.view("i8"), tz) + data = utc_vals.view(DT64NS_DTYPE) elif inferred_tz: tz = inferred_tz elif allow_object and data.dtype == object: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/core/arrays/sparse/scipy_sparse.py new/pandas-1.3.5/pandas/core/arrays/sparse/scipy_sparse.py --- old/pandas-1.3.4/pandas/core/arrays/sparse/scipy_sparse.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/core/arrays/sparse/scipy_sparse.py 2021-12-12 11:20:47.000000000 +0100 @@ -122,7 +122,7 @@ Parameters ---------- - A : scipy.sparse.coo.coo_matrix + A : scipy.sparse.coo_matrix dense_index : bool, default False Returns diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/core/dtypes/cast.py new/pandas-1.3.5/pandas/core/dtypes/cast.py --- old/pandas-1.3.4/pandas/core/dtypes/cast.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/core/dtypes/cast.py 2021-12-12 11:20:47.000000000 +0100 @@ -908,9 +908,9 @@ """ new_dtype, fill_value = maybe_promote(values.dtype, fill_value) # We get a copy in all cases _except_ (values.dtype == new_dtype and not copy) - values = values.astype(new_dtype, copy=copy) + upcast_values = values.astype(new_dtype, copy=copy) - return values, fill_value + return upcast_values, fill_value # type: ignore[return-value] def invalidate_string_dtypes(dtype_set: set[DtypeObj]): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/core/groupby/ops.py new/pandas-1.3.5/pandas/core/groupby/ops.py --- old/pandas-1.3.4/pandas/core/groupby/ops.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/core/groupby/ops.py 2021-12-12 11:20:47.000000000 +0100 @@ -546,9 +546,10 @@ elif is_bool_dtype(dtype): values = values.astype("int64") elif is_integer_dtype(dtype): - # e.g. uint8 -> uint64, int16 -> int64 - dtype_str = dtype.kind + "8" - values = values.astype(dtype_str, copy=False) + # GH#43329 If the dtype is explicitly of type uint64 the type is not + # changed to prevent overflow. + if dtype != np.uint64: + values = values.astype(np.int64, copy=False) elif is_numeric: if not is_complex_dtype(dtype): values = ensure_float64(values) @@ -574,6 +575,16 @@ min_count, is_datetimelike=is_datetimelike, ) + elif self.how in ["add"]: + # We support datetimelike + func( + result, + counts, + values, + comp_ids, + min_count, + datetimelike=is_datetimelike, + ) else: func(result, counts, values, comp_ids, min_count) else: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/core/nanops.py new/pandas-1.3.5/pandas/core/nanops.py --- old/pandas-1.3.4/pandas/core/nanops.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/core/nanops.py 2021-12-12 11:20:47.000000000 +0100 @@ -29,6 +29,7 @@ Shape, ) from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import np_percentile_argname from pandas.core.dtypes.common import ( get_dtype, @@ -1667,7 +1668,7 @@ if len(values) == 0: return np.array([na_value] * len(q), dtype=values.dtype) - return np.percentile(values, q, interpolation=interpolation) + return np.percentile(values, q, **{np_percentile_argname: interpolation}) def nanpercentile( @@ -1720,7 +1721,9 @@ result = np.array(result, dtype=values.dtype, copy=False).T return result else: - return np.percentile(values, q, axis=1, interpolation=interpolation) + return np.percentile( + values, q, axis=1, **{np_percentile_argname: interpolation} + ) def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/core/reshape/merge.py new/pandas-1.3.5/pandas/core/reshape/merge.py --- old/pandas-1.3.4/pandas/core/reshape/merge.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/core/reshape/merge.py 2021-12-12 11:20:47.000000000 +0100 @@ -1781,21 +1781,27 @@ # GH#29130 Check that merge keys do not have dtype object if not self.left_index: left_on = self.left_on[0] - lo_dtype = ( - self.left[left_on].dtype - if left_on in self.left.columns - else self.left.index.get_level_values(left_on) - ) + if is_array_like(left_on): + lo_dtype = left_on.dtype + else: + lo_dtype = ( + self.left[left_on].dtype + if left_on in self.left.columns + else self.left.index.get_level_values(left_on) + ) else: lo_dtype = self.left.index.dtype if not self.right_index: right_on = self.right_on[0] - ro_dtype = ( - self.right[right_on].dtype - if right_on in self.right.columns - else self.right.index.get_level_values(right_on) - ) + if is_array_like(right_on): + ro_dtype = right_on.dtype + else: + ro_dtype = ( + self.right[right_on].dtype + if right_on in self.right.columns + else self.right.index.get_level_values(right_on) + ) else: ro_dtype = self.right.index.dtype diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/core/window/rolling.py new/pandas-1.3.5/pandas/core/window/rolling.py --- old/pandas-1.3.4/pandas/core/window/rolling.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/core/window/rolling.py 2021-12-12 11:20:47.000000000 +0100 @@ -658,8 +658,11 @@ target = self._create_data(target) result = super()._apply_pairwise(target, other, pairwise, func) # 1) Determine the levels + codes of the groupby levels - if other is not None: - # When we have other, we must reindex (expand) the result + if other is not None and not all( + len(group) == len(other) for group in self._grouper.indices.values() + ): + # GH 42915 + # len(other) != len(any group), so must reindex (expand) the result # from flex_binary_moment to a "transform"-like result # per groupby combination old_result_len = len(result) @@ -681,10 +684,9 @@ codes, levels = factorize(labels) groupby_codes.append(codes) groupby_levels.append(levels) - else: - # When we evaluate the pairwise=True result, repeat the groupby - # labels by the number of columns in the original object + # pairwise=True or len(other) == len(each group), so repeat + # the groupby labels by the number of columns in the original object groupby_codes = self._grouper.codes # error: Incompatible types in assignment (expression has type # "List[Index]", variable has type "List[Union[ndarray, Index]]") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/io/parsers/c_parser_wrapper.py new/pandas-1.3.5/pandas/io/parsers/c_parser_wrapper.py --- old/pandas-1.3.4/pandas/io/parsers/c_parser_wrapper.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/io/parsers/c_parser_wrapper.py 2021-12-12 11:20:47.000000000 +0100 @@ -206,9 +206,10 @@ """ assert self.orig_names is not None # error: Cannot determine type of 'names' - col_indices = [ - self.orig_names.index(x) for x in self.names # type: ignore[has-type] - ] + + # much faster than using orig_names.index(x) xref GH#44106 + names_dict = {x: i for i, x in enumerate(self.orig_names)} + col_indices = [names_dict[x] for x in self.names] # type: ignore[has-type] # error: Cannot determine type of 'names' noconvert_columns = self._set_noconvert_dtype_columns( col_indices, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/plotting/_matplotlib/compat.py new/pandas-1.3.5/pandas/plotting/_matplotlib/compat.py --- old/pandas-1.3.4/pandas/plotting/_matplotlib/compat.py 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/plotting/_matplotlib/compat.py 2021-12-12 11:15:39.000000000 +0100 @@ -24,3 +24,4 @@ mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge) mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge) mpl_ge_3_4_0 = _mpl_version("3.4.0", operator.ge) +mpl_ge_3_5_0 = _mpl_version("3.5.0", operator.ge) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/plotting/_matplotlib/converter.py new/pandas-1.3.5/pandas/plotting/_matplotlib/converter.py --- old/pandas-1.3.4/pandas/plotting/_matplotlib/converter.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/plotting/_matplotlib/converter.py 2021-12-12 11:20:47.000000000 +0100 @@ -353,8 +353,8 @@ locator = MilliSecondLocator(self.tz) locator.set_axis(self.axis) - locator.set_view_interval(*self.axis.get_view_interval()) - locator.set_data_interval(*self.axis.get_data_interval()) + locator.axis.set_view_interval(*self.axis.get_view_interval()) + locator.axis.set_data_interval(*self.axis.get_data_interval()) return locator return dates.AutoDateLocator.get_locator(self, dmin, dmax) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/arrays/sparse/test_array.py new/pandas-1.3.5/pandas/tests/arrays/sparse/test_array.py --- old/pandas-1.3.4/pandas/tests/arrays/sparse/test_array.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/arrays/sparse/test_array.py 2021-12-12 11:20:47.000000000 +0100 @@ -1204,7 +1204,7 @@ dtype="Sparse[int]", ) A, _, _ = ser.sparse.to_coo() - assert isinstance(A, scipy.sparse.coo.coo_matrix) + assert isinstance(A, scipy.sparse.coo_matrix) def test_non_sparse_raises(self): ser = pd.Series([1, 2, 3]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/frame/constructors/test_from_records.py new/pandas-1.3.5/pandas/tests/frame/constructors/test_from_records.py --- old/pandas-1.3.4/pandas/tests/frame/constructors/test_from_records.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/frame/constructors/test_from_records.py 2021-12-12 11:20:47.000000000 +0100 @@ -34,10 +34,7 @@ arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])] dtypes = [("EXPIRY", "<M8[ns]")] - try: - recarray = np.core.records.fromarrays(arrdata, dtype=dtypes) - except (ValueError): - pytest.skip("known failure of numpy rec array creation") + recarray = np.core.records.fromarrays(arrdata, dtype=dtypes) result = DataFrame.from_records(recarray) tm.assert_frame_equal(result, expected) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/frame/methods/test_quantile.py new/pandas-1.3.5/pandas/tests/frame/methods/test_quantile.py --- old/pandas-1.3.4/pandas/tests/frame/methods/test_quantile.py 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/frame/methods/test_quantile.py 2021-12-12 11:20:47.000000000 +0100 @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.compat.numpy import np_percentile_argname + import pandas as pd from pandas import ( DataFrame, @@ -153,7 +155,10 @@ # cross-check interpolation=nearest results in original dtype exp = np.percentile( - np.array([[1, 2, 3], [2, 3, 4]]), 0.5, axis=0, interpolation="nearest" + np.array([[1, 2, 3], [2, 3, 4]]), + 0.5, + axis=0, + **{np_percentile_argname: "nearest"}, ) expected = Series(exp, index=[1, 2, 3], name=0.5, dtype="int64") tm.assert_series_equal(result, expected) @@ -167,7 +172,7 @@ np.array([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]), 0.5, axis=0, - interpolation="nearest", + **{np_percentile_argname: "nearest"}, ) expected = Series(exp, index=[1, 2, 3], name=0.5, dtype="float64") tm.assert_series_equal(result, expected) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/frame/test_constructors.py new/pandas-1.3.5/pandas/tests/frame/test_constructors.py --- old/pandas-1.3.4/pandas/tests/frame/test_constructors.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/frame/test_constructors.py 2021-12-12 11:20:47.000000000 +0100 @@ -68,6 +68,19 @@ class TestDataFrameConstructors: + def test_constructor_dict_with_tzaware_scalar(self): + # GH#42505 + dt = Timestamp("2019-11-03 01:00:00-0700").tz_convert("America/Los_Angeles") + + df = DataFrame({"dt": dt}, index=[0]) + expected = DataFrame({"dt": [dt]}) + tm.assert_frame_equal(df, expected) + + # Non-homogeneous + df = DataFrame({"dt": dt, "value": [1]}) + expected = DataFrame({"dt": [dt], "value": [1]}) + tm.assert_frame_equal(df, expected) + def test_construct_ndarray_with_nas_and_int_dtype(self): # GH#26919 match Series by not casting np.nan to meaningless int arr = np.array([[1, np.nan], [2, 3]]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/groupby/test_function.py new/pandas-1.3.5/pandas/tests/groupby/test_function.py --- old/pandas-1.3.4/pandas/tests/groupby/test_function.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/groupby/test_function.py 2021-12-12 11:20:47.000000000 +0100 @@ -1105,3 +1105,27 @@ result = grouped.sum(min_count=2) expected = DataFrame({"b": [pd.NA] * 3, "c": [pd.NA] * 3}, dtype="Int64", index=idx) tm.assert_frame_equal(result, expected) + + +def test_groupby_sum_timedelta_with_nat(): + # GH#42659 + df = DataFrame( + { + "a": [1, 1, 2, 2], + "b": [pd.Timedelta("1d"), pd.Timedelta("2d"), pd.Timedelta("3d"), pd.NaT], + } + ) + td3 = pd.Timedelta(days=3) + + gb = df.groupby("a") + + res = gb.sum() + expected = DataFrame({"b": [td3, td3]}, index=Index([1, 2], name="a")) + tm.assert_frame_equal(res, expected) + + res = gb["b"].sum() + tm.assert_series_equal(res, expected["b"]) + + res = gb["b"].sum(min_count=2) + expected = Series([td3, pd.NaT], dtype="m8[ns]", name="b", index=expected.index) + tm.assert_series_equal(res, expected) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/indexes/datetimes/test_indexing.py new/pandas-1.3.5/pandas/tests/indexes/datetimes/test_indexing.py --- old/pandas-1.3.4/pandas/tests/indexes/datetimes/test_indexing.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/indexes/datetimes/test_indexing.py 2021-12-12 11:20:47.000000000 +0100 @@ -652,10 +652,6 @@ ([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]), ], ) - # FIXME: these warnings are flaky GH#36131 - @pytest.mark.filterwarnings( - "ignore:Comparison of Timestamp with datetime.date:FutureWarning" - ) def test_get_indexer_out_of_bounds_date(self, target, positions): values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/indexes/test_engines.py new/pandas-1.3.5/pandas/tests/indexes/test_engines.py --- old/pandas-1.3.4/pandas/tests/indexes/test_engines.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/indexes/test_engines.py 2021-12-12 11:20:47.000000000 +0100 @@ -61,13 +61,7 @@ @pytest.mark.parametrize( "scalar", [ - # error: Argument 1 to "Timestamp" has incompatible type "timedelta64"; - # expected "Union[integer[Any], float, str, date, datetime64]" - pd.Timestamp( - pd.Timedelta(days=42).asm8.view( - "datetime64[ns]" - ) # type: ignore[arg-type] - ), + pd.Timestamp(pd.Timedelta(days=42).asm8.view("datetime64[ns]")), pd.Timedelta(days=42).value, pd.Timedelta(days=42).to_pytimedelta(), pd.Timedelta(days=42).to_timedelta64(), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/io/parser/test_quoting.py new/pandas-1.3.5/pandas/tests/io/parser/test_quoting.py --- old/pandas-1.3.4/pandas/tests/io/parser/test_quoting.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/io/parser/test_quoting.py 2021-12-12 11:20:47.000000000 +0100 @@ -22,7 +22,7 @@ {"quotechar": None, "quoting": csv.QUOTE_MINIMAL}, "quotechar must be set if quoting enabled", ), - ({"quotechar": 2}, '"quotechar" must be string, not int'), + ({"quotechar": 2}, '"quotechar" must be string( or None)?, not int'), ], ) def test_bad_quote_char(all_parsers, kwargs, msg): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/plotting/common.py new/pandas-1.3.5/pandas/tests/plotting/common.py --- old/pandas-1.3.4/pandas/tests/plotting/common.py 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/plotting/common.py 2021-12-12 11:15:39.000000000 +0100 @@ -45,6 +45,8 @@ from pandas.plotting._matplotlib import compat + self.compat = compat + mpl.rcdefaults() self.start_date_to_int64 = 812419200000000000 @@ -550,7 +552,7 @@ obj.plot(kind=kind, grid=False, **kws) assert not is_grid_on() - if kind != "pie": + if kind not in ["pie", "hexbin", "scatter"]: self.plt.subplot(1, 4 * len(kinds), spndx) spndx += 1 mpl.rc("axes", grid=True) @@ -569,6 +571,12 @@ """ return [v[field] for v in rcParams["axes.prop_cycle"]] + def get_x_axis(self, ax): + return ax._shared_axes["x"] if self.compat.mpl_ge_3_5_0() else ax._shared_x_axes + + def get_y_axis(self, ax): + return ax._shared_axes["y"] if self.compat.mpl_ge_3_5_0() else ax._shared_y_axes + def _check_plot_works(f, filterwarnings="always", default_axes=False, **kwargs): """ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/plotting/frame/test_frame.py new/pandas-1.3.5/pandas/tests/plotting/frame/test_frame.py --- old/pandas-1.3.4/pandas/tests/plotting/frame/test_frame.py 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/plotting/frame/test_frame.py 2021-12-12 11:15:39.000000000 +0100 @@ -525,8 +525,8 @@ df.plot(ax=ax1, kind="area") df.plot(ax=ax2, kind="area") - assert ax1._shared_y_axes.joined(ax1, ax2) - assert ax2._shared_y_axes.joined(ax1, ax2) + assert self.get_y_axis(ax1).joined(ax1, ax2) + assert self.get_y_axis(ax2).joined(ax1, ax2) def test_bar_linewidth(self): df = DataFrame(np.random.randn(5, 5)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/plotting/test_common.py new/pandas-1.3.5/pandas/tests/plotting/test_common.py --- old/pandas-1.3.4/pandas/tests/plotting/test_common.py 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/plotting/test_common.py 2021-12-12 11:15:39.000000000 +0100 @@ -39,4 +39,6 @@ next(gen) axes = fig.get_axes() assert len(axes) == 1 - assert axes[0].get_geometry() == (2, 1, 2) + subplot_geometry = list(axes[0].get_subplotspec().get_geometry()[:-1]) + subplot_geometry[-1] += 1 + assert subplot_geometry == [2, 1, 2] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/plotting/test_datetimelike.py new/pandas-1.3.5/pandas/tests/plotting/test_datetimelike.py --- old/pandas-1.3.4/pandas/tests/plotting/test_datetimelike.py 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/plotting/test_datetimelike.py 2021-12-12 11:20:47.000000000 +0100 @@ -180,12 +180,7 @@ first_line = ax.get_lines()[0] first_x = first_line.get_xdata()[0].ordinal first_y = first_line.get_ydata()[0] - try: - assert expected_string == ax.format_coord(first_x, first_y) - except (ValueError): - pytest.skip( - "skipping test because issue forming test comparison GH7664" - ) + assert expected_string == ax.format_coord(first_x, first_y) annual = Series(1, index=date_range("2014-01-01", periods=3, freq="A-DEC")) _, ax = self.plt.subplots() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/plotting/test_hist_method.py new/pandas-1.3.5/pandas/tests/plotting/test_hist_method.py --- old/pandas-1.3.4/pandas/tests/plotting/test_hist_method.py 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/plotting/test_hist_method.py 2021-12-12 11:15:39.000000000 +0100 @@ -728,35 +728,35 @@ ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True) # share x - assert ax1._shared_x_axes.joined(ax1, ax2) - assert ax2._shared_x_axes.joined(ax1, ax2) + assert self.get_x_axis(ax1).joined(ax1, ax2) + assert self.get_x_axis(ax2).joined(ax1, ax2) # don't share y - assert not ax1._shared_y_axes.joined(ax1, ax2) - assert not ax2._shared_y_axes.joined(ax1, ax2) + assert not self.get_y_axis(ax1).joined(ax1, ax2) + assert not self.get_y_axis(ax2).joined(ax1, ax2) def test_axis_share_y(self): df = self.hist_df ax1, ax2 = df.hist(column="height", by=df.gender, sharey=True) # share y - assert ax1._shared_y_axes.joined(ax1, ax2) - assert ax2._shared_y_axes.joined(ax1, ax2) + assert self.get_y_axis(ax1).joined(ax1, ax2) + assert self.get_y_axis(ax2).joined(ax1, ax2) # don't share x - assert not ax1._shared_x_axes.joined(ax1, ax2) - assert not ax2._shared_x_axes.joined(ax1, ax2) + assert not self.get_x_axis(ax1).joined(ax1, ax2) + assert not self.get_x_axis(ax2).joined(ax1, ax2) def test_axis_share_xy(self): df = self.hist_df ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True, sharey=True) # share both x and y - assert ax1._shared_x_axes.joined(ax1, ax2) - assert ax2._shared_x_axes.joined(ax1, ax2) + assert self.get_x_axis(ax1).joined(ax1, ax2) + assert self.get_x_axis(ax2).joined(ax1, ax2) - assert ax1._shared_y_axes.joined(ax1, ax2) - assert ax2._shared_y_axes.joined(ax1, ax2) + assert self.get_y_axis(ax1).joined(ax1, ax2) + assert self.get_y_axis(ax2).joined(ax1, ax2) @pytest.mark.parametrize( "histtype, expected", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/plotting/test_series.py new/pandas-1.3.5/pandas/tests/plotting/test_series.py --- old/pandas-1.3.4/pandas/tests/plotting/test_series.py 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/plotting/test_series.py 2021-12-12 11:20:47.000000000 +0100 @@ -154,8 +154,8 @@ abs(self.ts).plot(ax=ax1, kind="area") abs(self.ts).plot(ax=ax2, kind="area") - assert ax1._shared_y_axes.joined(ax1, ax2) - assert ax2._shared_y_axes.joined(ax1, ax2) + assert self.get_y_axis(ax1).joined(ax1, ax2) + assert self.get_y_axis(ax2).joined(ax1, ax2) def test_label(self): s = Series([1, 2]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/resample/test_datetime_index.py new/pandas-1.3.5/pandas/tests/resample/test_datetime_index.py --- old/pandas-1.3.4/pandas/tests/resample/test_datetime_index.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/resample/test_datetime_index.py 2021-12-12 11:20:47.000000000 +0100 @@ -1827,3 +1827,27 @@ index=DatetimeIndex(["2020-03-31"], dtype="datetime64[ns]", freq="Q-DEC"), ) tm.assert_series_equal(result, expected) + + +def test_resample_unsigned_int(uint_dtype): + # gh-43329 + df = DataFrame( + index=date_range(start="2000-01-01", end="2000-01-03 23", freq="12H"), + columns=["x"], + data=[0, 1, 0] * 2, + dtype=uint_dtype, + ) + df = df.loc[(df.index < "2000-01-02") | (df.index > "2000-01-03"), :] + + if uint_dtype == "uint64": + with pytest.raises(RuntimeError, match="empty group with uint64_t"): + result = df.resample("D").max() + else: + result = df.resample("D").max() + + expected = DataFrame( + [1, np.nan, 0], + columns=["x"], + index=date_range(start="2000-01-01", end="2000-01-03 23", freq="D"), + ) + tm.assert_frame_equal(result, expected) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/reshape/merge/test_merge_asof.py new/pandas-1.3.5/pandas/tests/reshape/merge/test_merge_asof.py --- old/pandas-1.3.4/pandas/tests/reshape/merge/test_merge_asof.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/reshape/merge/test_merge_asof.py 2021-12-12 11:20:47.000000000 +0100 @@ -1484,3 +1484,44 @@ match=r"Incompatible merge dtype, .*, both sides must have numeric dtype", ): merge_asof(left, right, left_on="a", right_on="a") + + +def test_merge_asof_array_as_on(): + # GH#42844 + right = pd.DataFrame( + { + "a": [2, 6], + "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], + } + ) + ts_merge = pd.date_range( + start=pd.Timestamp("2021/01/01 00:00"), periods=3, freq="1h" + ) + left = pd.DataFrame({"b": [4, 8, 7]}) + result = merge_asof( + left, + right, + left_on=ts_merge, + right_on="ts", + allow_exact_matches=False, + direction="backward", + ) + expected = pd.DataFrame({"b": [4, 8, 7], "a": [np.nan, 2, 6], "ts": ts_merge}) + tm.assert_frame_equal(result, expected) + + result = merge_asof( + right, + left, + left_on="ts", + right_on=ts_merge, + allow_exact_matches=False, + direction="backward", + ) + expected = pd.DataFrame( + { + "a": [2, 6], + "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], + "b": [4, 8], + } + ) + tm.assert_frame_equal(result, expected) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/series/methods/test_drop_duplicates.py new/pandas-1.3.5/pandas/tests/series/methods/test_drop_duplicates.py --- old/pandas-1.3.4/pandas/tests/series/methods/test_drop_duplicates.py 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/series/methods/test_drop_duplicates.py 2021-12-12 11:20:47.000000000 +0100 @@ -2,6 +2,7 @@ import pytest from pandas import ( + NA, Categorical, Series, ) @@ -224,6 +225,20 @@ assert return_value is None tm.assert_series_equal(sc, tc[~expected]) + def test_drop_duplicates_categorical_bool_na(self): + # GH#44351 + ser = Series( + Categorical( + [True, False, True, False, NA], categories=[True, False], ordered=True + ) + ) + result = ser.drop_duplicates() + expected = Series( + Categorical([True, False, np.nan], categories=[True, False], ordered=True), + index=[0, 1, 4], + ) + tm.assert_series_equal(result, expected) + def test_drop_duplicates_pos_args_deprecation(): # GH#41485 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/series/methods/test_duplicated.py new/pandas-1.3.5/pandas/tests/series/methods/test_duplicated.py --- old/pandas-1.3.4/pandas/tests/series/methods/test_duplicated.py 2021-10-17 14:27:28.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/series/methods/test_duplicated.py 2021-12-12 11:20:47.000000000 +0100 @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import Series +from pandas import ( + NA, + Categorical, + Series, +) import pandas._testing as tm @@ -33,3 +37,15 @@ result = ser.duplicated(keep=keep) tm.assert_series_equal(result, expected) + + +def test_duplicated_categorical_bool_na(): + # GH#44351 + ser = Series( + Categorical( + [True, False, True, False, NA], categories=[True, False], ordered=True + ) + ) + result = ser.duplicated() + expected = Series([False, False, True, True, False]) + tm.assert_series_equal(result, expected) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/series/methods/test_equals.py new/pandas-1.3.5/pandas/tests/series/methods/test_equals.py --- old/pandas-1.3.4/pandas/tests/series/methods/test_equals.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/series/methods/test_equals.py 2021-12-12 11:20:47.000000000 +0100 @@ -125,3 +125,18 @@ assert ser.equals(ser2) assert Index(ser).equals(Index(ser2)) assert ser.array.equals(ser2.array) + + +def test_equals_None_vs_float(): + # GH#44190 + left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object) + right = Series([None] * len(left)) + + # these series were found to be equal due to a bug, check that they are correctly + # found to not equal + assert not left.equals(right) + assert not right.equals(left) + assert not left.to_frame().equals(right.to_frame()) + assert not right.to_frame().equals(left.to_frame()) + assert not Index(left, dtype="object").equals(Index(right, dtype="object")) + assert not Index(right, dtype="object").equals(Index(left, dtype="object")) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas/tests/window/test_groupby.py new/pandas-1.3.5/pandas/tests/window/test_groupby.py --- old/pandas-1.3.4/pandas/tests/window/test_groupby.py 2021-10-17 14:27:57.000000000 +0200 +++ new/pandas-1.3.5/pandas/tests/window/test_groupby.py 2021-12-12 11:20:47.000000000 +0100 @@ -122,8 +122,33 @@ expected.index = expected_index tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("f, expected_val", [["corr", 1], ["cov", 0.5]]) + def test_rolling_corr_cov_other_same_size_as_groups(self, f, expected_val): + # GH 42915 + df = DataFrame( + {"value": range(10), "idx1": [1] * 5 + [2] * 5, "idx2": [1, 2, 3, 4, 5] * 2} + ).set_index(["idx1", "idx2"]) + other = DataFrame({"value": range(5), "idx2": [1, 2, 3, 4, 5]}).set_index( + "idx2" + ) + result = getattr(df.groupby(level=0).rolling(2), f)(other) + expected_data = ([np.nan] + [expected_val] * 4) * 2 + expected = DataFrame( + expected_data, + columns=["value"], + index=MultiIndex.from_arrays( + [ + [1] * 5 + [2] * 5, + [1] * 5 + [2] * 5, + list(range(1, 6)) * 2, + ], + names=["idx1", "idx1", "idx2"], + ), + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("f", ["corr", "cov"]) - def test_rolling_corr_cov(self, f): + def test_rolling_corr_cov_other_diff_size_as_groups(self, f): g = self.frame.groupby("A") r = g.rolling(window=4) @@ -138,6 +163,11 @@ expected["A"] = np.nan tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("f", ["corr", "cov"]) + def test_rolling_corr_cov_pairwise(self, f): + g = self.frame.groupby("A") + r = g.rolling(window=4) + result = getattr(r.B, f)(pairwise=True) def func(x): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas.egg-info/PKG-INFO new/pandas-1.3.5/pandas.egg-info/PKG-INFO --- old/pandas-1.3.4/pandas.egg-info/PKG-INFO 2021-10-17 14:40:38.000000000 +0200 +++ new/pandas-1.3.5/pandas.egg-info/PKG-INFO 2021-12-12 11:23:37.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: pandas -Version: 1.3.4 +Version: 1.3.5 Summary: Powerful data structures for data analysis, time series, and statistics Home-page: https://pandas.pydata.org Author: The Pandas Development Team diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pandas-1.3.4/pandas.egg-info/SOURCES.txt new/pandas-1.3.5/pandas.egg-info/SOURCES.txt --- old/pandas-1.3.4/pandas.egg-info/SOURCES.txt 2021-10-17 14:40:38.000000000 +0200 +++ new/pandas-1.3.5/pandas.egg-info/SOURCES.txt 2021-12-12 11:23:37.000000000 +0100 @@ -246,6 +246,7 @@ doc/source/whatsnew/v1.3.2.rst doc/source/whatsnew/v1.3.3.rst doc/source/whatsnew/v1.3.4.rst +doc/source/whatsnew/v1.3.5.rst doc/sphinxext/README.rst doc/sphinxext/announce.py doc/sphinxext/contributors.py