The upstream patch doesn't even apply as-is; this version does, but I don't have time right now to actually test it.

There's also a circular dependency problem, as dask indirectly build-depends on itself and my new pandas makes it uninstallable.

Description: pandas 1.1 compatibility

Origin: part of upstream f212b76fefeb93298205d7d224cbc1f7ed387ce9
Author: Tom Augspurger, Rebecca Palmer

diff --git a/dask/dataframe/core.py b/dask/dataframe/core.py
index 4a5c6d1f..cedd46fc 100644
--- a/dask/dataframe/core.py
+++ b/dask/dataframe/core.py
@@ -2487,7 +2487,7 @@ Dask Name: {name}, {task} tasks"""
         else:
             is_anchored = offset.isAnchored()

-        include_right = is_anchored or not hasattr(offset, "_inc")
+        include_right = is_anchored or not hasattr(offset, "delta")

         if end == self.npartitions - 1:
             divs = self.divisions
@@ -4106,7 +4106,7 @@ class DataFrame(_Frame):
             left_index=on is None,
             right_index=True,
             left_on=on,
-            suffixes=[lsuffix, rsuffix],
+            suffixes=(lsuffix, rsuffix),
             npartitions=npartitions,
             shuffle=shuffle,
         )
diff --git a/dask/dataframe/tests/test_dataframe.py b/dask/dataframe/tests/test_dataframe.py
index 64c15000..5e4f2bef 100644
--- a/dask/dataframe/tests/test_dataframe.py
+++ b/dask/dataframe/tests/test_dataframe.py
@@ -37,6 +37,9 @@ dsk = {
 meta = make_meta({"a": "i8", "b": "i8"}, index=pd.Index([], "i8"))
 d = dd.DataFrame(dsk, "x", meta, [0, 5, 9, 9])
 full = d.compute()
+CHECK_FREQ = {}
+if dd._compat.PANDAS_GT_110:
+    CHECK_FREQ["check_freq"] = False


 def test_dataframe_doc():
@@ -222,7 +225,18 @@ def test_index_names():
     assert ddf.index.compute().name == "x"


[email protected]("npartitions", [1, pytest.param(2, marks=pytest.mark.xfail)])
[email protected](
+    "npartitions",
+    [
+        1,
+        pytest.param(
+            2,
+            marks=pytest.mark.xfail(
+                not dd._compat.PANDAS_GT_110, reason="Fixed upstream."
+            ),
+        ),
+    ],
+)
 def test_timezone_freq(npartitions):
     s_naive = pd.Series(pd.date_range("20130101", periods=10))
s_aware = pd.Series(pd.date_range("20130101", periods=10, tz="US/Eastern"))
@@ -385,12 +399,48 @@ def test_describe_numeric(method, test_values):
         (None, None, None, ["c", "d", "g"]),  # numeric + bool
(None, None, None, ["c", "d", "f", "g"]), # numeric + bool + timedelta
         (None, None, None, ["f", "g"]),  # bool + timedelta
-        ("all", None, None, None),
-        (["number"], None, [0.25, 0.5], None),
-        ([np.timedelta64], None, None, None),
-        (["number", "object"], None, [0.25, 0.75], None),
-        (None, ["number", "object"], None, None),
-        (["object", "datetime", "bool"], None, None, None),
+        pytest.param(
+            "all",
+            None,
+            None,
+            None,
+ marks=pytest.mark.xfail(PANDAS_GT_110, reason="upstream changes"),
+        ),
+        pytest.param(
+            ["number"],
+            None,
+            [0.25, 0.5],
+            None,
+ marks=pytest.mark.xfail(PANDAS_GT_110, reason="upstream changes"),
+        ),
+        pytest.param(
+            [np.timedelta64],
+            None,
+            None,
+            None,
+ marks=pytest.mark.xfail(PANDAS_GT_110, reason="upstream changes"),
+        ),
+        pytest.param(
+            ["number", "object"],
+            None,
+            [0.25, 0.75],
+            None,
+ marks=pytest.mark.xfail(PANDAS_GT_110, reason="upstream changes"),
+        ),
+        pytest.param(
+            None,
+            ["number", "object"],
+            None,
+            None,
+ marks=pytest.mark.xfail(PANDAS_GT_110, reason="upstream changes"),
+        ),
+        pytest.param(
+            ["object", "datetime", "bool"],
+            None,
+            None,
+            None,
+ marks=pytest.mark.xfail(PANDAS_GT_110, reason="upstream changes"),
+        ),
     ],
 )
 def test_describe(include, exclude, percentiles, subset):
@@ -2522,15 +2572,17 @@ def test_to_timestamp():
     index = pd.period_range(freq="A", start="1/1/2001", end="12/1/2004")
df = pd.DataFrame({"x": [1, 2, 3, 4], "y": [10, 20, 30, 40]}, index=index)
     ddf = dd.from_pandas(df, npartitions=3)
-    assert_eq(ddf.to_timestamp(), df.to_timestamp())
+    assert_eq(ddf.to_timestamp(), df.to_timestamp(), **CHECK_FREQ)
     assert_eq(
         ddf.to_timestamp(freq="M", how="s").compute(),
         df.to_timestamp(freq="M", how="s"),
+        **CHECK_FREQ
     )
     assert_eq(ddf.x.to_timestamp(), df.x.to_timestamp())
     assert_eq(
         ddf.x.to_timestamp(freq="M", how="s").compute(),
         df.x.to_timestamp(freq="M", how="s"),
+        **CHECK_FREQ
     )


diff --git a/dask/dataframe/tests/test_extensions.py b/dask/dataframe/tests/test_extensions.py
index bc83784a..c69bcd06 100644
--- a/dask/dataframe/tests/test_extensions.py
+++ b/dask/dataframe/tests/test_extensions.py
@@ -41,7 +41,11 @@ def test_reduction():
     dser = dd.from_pandas(ser, 2)
     assert_eq(ser.mean(skipna=False), dser.mean(skipna=False))

- assert_eq(ser.to_frame().mean(skipna=False), dser.to_frame().mean(skipna=False)) + # It's unclear whether this can be reliably provided, at least with the current + # implementation, which uses pandas.DataFrame.sum(), returning a (homogenous)
+    # series which has potentially cast values.
+
+ # assert_eq(ser.to_frame().mean(skipna=False), dser.to_frame().mean(skipna=False))


 def test_scalar():
diff --git a/dask/dataframe/tests/test_indexing.py b/dask/dataframe/tests/test_indexing.py
index 2348b89f..88939db4 100644
--- a/dask/dataframe/tests/test_indexing.py
+++ b/dask/dataframe/tests/test_indexing.py
@@ -19,6 +19,9 @@ dsk = {
 meta = make_meta({"a": "i8", "b": "i8"}, index=pd.Index([], "i8"))
 d = dd.DataFrame(dsk, "x", meta, [0, 5, 9, 9])
 full = d.compute()
+CHECK_FREQ = {}
+if dd._compat.PANDAS_GT_110:
+    CHECK_FREQ["check_freq"] = False


 def test_loc():
@@ -369,24 +372,35 @@ def test_loc_timestamp_str():
     assert_eq(df.loc["2011-01-02"], ddf.loc["2011-01-02"])
assert_eq(df.loc["2011-01-02":"2011-01-10"], ddf.loc["2011-01-02":"2011-01-10"])
     # same reso, dask result is always DataFrame
- assert_eq(df.loc["2011-01-02 10:00"].to_frame().T, ddf.loc["2011-01-02 10:00"])
+    assert_eq(
+        df.loc["2011-01-02 10:00"].to_frame().T,
+        ddf.loc["2011-01-02 10:00"],
+        **CHECK_FREQ
+    )

     # series
-    assert_eq(df.A.loc["2011-01-02"], ddf.A.loc["2011-01-02"])
- assert_eq(df.A.loc["2011-01-02":"2011-01-10"], ddf.A.loc["2011-01-02":"2011-01-10"]) + assert_eq(df.A.loc["2011-01-02"], ddf.A.loc["2011-01-02"], **CHECK_FREQ)
+    assert_eq(
+        df.A.loc["2011-01-02":"2011-01-10"],
+        ddf.A.loc["2011-01-02":"2011-01-10"],
+        **CHECK_FREQ
+    )

     # slice with timestamp (dask result must be DataFrame)
     assert_eq(
         df.loc[pd.Timestamp("2011-01-02")].to_frame().T,
         ddf.loc[pd.Timestamp("2011-01-02")],
+        **CHECK_FREQ
     )
     assert_eq(
         df.loc[pd.Timestamp("2011-01-02") : pd.Timestamp("2011-01-10")],
         ddf.loc[pd.Timestamp("2011-01-02") : pd.Timestamp("2011-01-10")],
+        **CHECK_FREQ
     )
     assert_eq(
         df.loc[pd.Timestamp("2011-01-02 10:00")].to_frame().T,
         ddf.loc[pd.Timestamp("2011-01-02 10:00")],
+        **CHECK_FREQ
     )

     df = pd.DataFrame(
diff --git a/dask/dataframe/tests/test_rolling.py b/dask/dataframe/tests/test_rolling.py
index 81d8f498..948e1fa5 100644
--- a/dask/dataframe/tests/test_rolling.py
+++ b/dask/dataframe/tests/test_rolling.py
@@ -4,6 +4,7 @@ import pandas as pd
 import pytest
 import numpy as np

+import dask.array as da
 import dask.dataframe as dd
 from dask.dataframe.utils import assert_eq, PANDAS_VERSION

@@ -139,6 +140,10 @@ rolling_method_args_check_less_precise = [
 @pytest.mark.parametrize("window", [1, 2, 4, 5])
 @pytest.mark.parametrize("center", [True, False])
def test_rolling_methods(method, args, window, center, check_less_precise):
+    if dd._compat.PANDAS_GT_110:
+        check_less_precise = {}
+    else:
+        check_less_precise = {"check_less_precise": check_less_precise}
     # DataFrame
     prolling = df.rolling(window, center=center)
     drolling = ddf.rolling(window, center=center)
@@ -150,7 +155,7 @@ def test_rolling_methods(method, args, window, center, check_less_precise):
     assert_eq(
         getattr(prolling, method)(*args, **kwargs),
         getattr(drolling, method)(*args, **kwargs),
-        check_less_precise=check_less_precise,
+        **check_less_precise,
     )

     # Series
@@ -159,7 +164,7 @@ def test_rolling_methods(method, args, window, center, check_less_precise):
     assert_eq(
         getattr(prolling, method)(*args, **kwargs),
         getattr(drolling, method)(*args, **kwargs),
-        check_less_precise=check_less_precise,
+        **check_less_precise,
     )


@@ -264,6 +269,11 @@ def test_time_rolling_constructor():
 )
@pytest.mark.parametrize("window", ["1S", "2S", "3S", pd.offsets.Second(5)])
 def test_time_rolling_methods(method, args, window, check_less_precise):
+    if dd._compat.PANDAS_GT_110:
+        check_less_precise = {}
+    else:
+        check_less_precise = {"check_less_precise": check_less_precise}
+
     # DataFrame
     if method == "apply":
         kwargs = {"raw": False}
@@ -274,7 +284,7 @@ def test_time_rolling_methods(method, args, window, check_less_precise):
     assert_eq(
         getattr(prolling, method)(*args, **kwargs),
         getattr(drolling, method)(*args, **kwargs),
-        check_less_precise=check_less_precise,
+        **check_less_precise,
     )

     # Series
@@ -283,7 +293,7 @@ def test_time_rolling_methods(method, args, window, check_less_precise):
     assert_eq(
         getattr(prolling, method)(*args, **kwargs),
         getattr(drolling, method)(*args, **kwargs),
-        check_less_precise=check_less_precise,
+        **check_less_precise,
     )


diff --git a/dask/dataframe/tests/test_shuffle.py b/dask/dataframe/tests/test_shuffle.py
index 63a65737..39f5ccd7 100644
--- a/dask/dataframe/tests/test_shuffle.py
+++ b/dask/dataframe/tests/test_shuffle.py
@@ -36,6 +35,9 @@ dsk = {
 meta = make_meta({"a": "i8", "b": "i8"}, index=pd.Index([], "i8"))
 d = dd.DataFrame(dsk, "x", meta, [0, 4, 9, 9])
 full = d.compute()
+CHECK_FREQ = {}
+if dd._compat.PANDAS_GT_110:
+    CHECK_FREQ["check_freq"] = False


 shuffle_func = shuffle  # conflicts with keyword argument
@@ -772,7 +774,7 @@ def test_set_index_on_empty():
         ddf = ddf[ddf.y > df.y.max()].set_index("x")
         expected_df = df[df.y > df.y.max()].set_index("x")

-        assert assert_eq(ddf, expected_df)
+        assert assert_eq(ddf, expected_df, **CHECK_FREQ)
         assert ddf.npartitions == 1


@@ -916,8 +918,8 @@ def test_set_index_timestamp():
         assert ts1.value == ts2.value
         assert ts1.tz == ts2.tz

-    assert_eq(df2, ddf_new_div)
-    assert_eq(df2, ddf.set_index("A"))
+    assert_eq(df2, ddf_new_div, **CHECK_FREQ)
+    assert_eq(df2, ddf.set_index("A"), **CHECK_FREQ)


 @pytest.mark.parametrize("compression", [None, "ZLib"])
diff --git a/dask/dataframe/tests/test_utils_dataframe.py b/dask/dataframe/tests/test_utils_dataframe.py
index ffbebb69..fa6a6625 100644
--- a/dask/dataframe/tests/test_utils_dataframe.py
+++ b/dask/dataframe/tests/test_utils_dataframe.py
@@ -129,7 +129,7 @@ def test_meta_nonempty():
             "E": np.int32(1),
             "F": pd.Timestamp("2016-01-01"),
"G": pd.date_range("2016-01-01", periods=3, tz="America/New_York"),
-            "H": pd.Timedelta("1 hours", "ms"),
+            "H": pd.Timedelta("1 hours"),
             "I": np.void(b" "),
             "J": pd.Categorical([UNKNOWN_CATEGORIES] * 3),
         },
@@ -147,7 +147,7 @@ def test_meta_nonempty():
     assert df3["E"][0].dtype == "i4"
     assert df3["F"][0] == pd.Timestamp("1970-01-01 00:00:00")
assert df3["G"][0] == pd.Timestamp("1970-01-01 00:00:00", tz="America/New_York")
-    assert df3["H"][0] == pd.Timedelta("1", "ms")
+    assert df3["H"][0] == pd.Timedelta("1")
     assert df3["I"][0] == "foo"
     assert df3["J"][0] == UNKNOWN_CATEGORIES

diff --git a/dask/dataframe/tseries/tests/test_resample.py b/dask/dataframe/tseries/tests/test_resample.py
index 327b4392..ee24313e 100644
--- a/dask/dataframe/tseries/tests/test_resample.py
+++ b/dask/dataframe/tseries/tests/test_resample.py
@@ -7,6 +7,10 @@ from dask.dataframe.utils import assert_eq, PANDAS_VERSION
 from dask.dataframe._compat import PANDAS_GT_0240
 import dask.dataframe as dd

+CHECK_FREQ = {}
+if dd._compat.PANDAS_GT_110:
+    CHECK_FREQ["check_freq"] = False
+

 def resample(df, freq, how="mean", **kwargs):
     return getattr(df.resample(freq, **kwargs), how)()
@@ -195,7 +199,7 @@ def test_series_resample_non_existent_datetime():
     result = ddf.resample("1D").mean()
     expected = df.resample("1D").mean()

-    assert_eq(result, expected)
+    assert_eq(result, expected, **CHECK_FREQ)


@pytest.mark.skipif(PANDAS_VERSION <= "0.23.4", reason="quantile not in 0.23")

Reply via email to