Bug#969648: dask, pandas 1.1

Rebecca N. Palmer Mon, 19 Oct 2020 04:49:03 -0700

The upstream patch doesn't even apply as-is; this version does, but Idon't have time right now to actually test it.

There's also a circular dependency problem, as dask indirectlybuild-depends on itself and my new pandas makes it uninstallable.


Description: pandas 1.1 compatibility

Origin: part of upstream f212b76fefeb93298205d7d224cbc1f7ed387ce9
Author: Tom Augspurger, Rebecca Palmer

diff --git a/dask/dataframe/core.py b/dask/dataframe/core.py
index 4a5c6d1f..cedd46fc 100644
--- a/dask/dataframe/core.py
+++ b/dask/dataframe/core.py
@@ -2487,7 +2487,7 @@ Dask Name: {name}, {task} tasks"""
         else:
             is_anchored = offset.isAnchored()

-        include_right = is_anchored or not hasattr(offset, "_inc")
+        include_right = is_anchored or not hasattr(offset, "delta")

         if end == self.npartitions - 1:
             divs = self.divisions
@@ -4106,7 +4106,7 @@ class DataFrame(_Frame):
             left_index=on is None,
             right_index=True,
             left_on=on,
-            suffixes=[lsuffix, rsuffix],
+            suffixes=(lsuffix, rsuffix),
             npartitions=npartitions,
             shuffle=shuffle,
         )

diff --git a/dask/dataframe/tests/test_dataframe.pyb/dask/dataframe/tests/test_dataframe.py

index 64c15000..5e4f2bef 100644
--- a/dask/dataframe/tests/test_dataframe.py
+++ b/dask/dataframe/tests/test_dataframe.py
@@ -37,6 +37,9 @@ dsk = {
 meta = make_meta({"a": "i8", "b": "i8"}, index=pd.Index([], "i8"))
 d = dd.DataFrame(dsk, "x", meta, [0, 5, 9, 9])
 full = d.compute()
+CHECK_FREQ = {}
+if dd._compat.PANDAS_GT_110:
+    CHECK_FREQ["check_freq"] = False


 def test_dataframe_doc():
@@ -222,7 +225,18 @@ def test_index_names():
     assert ddf.index.compute().name == "x"

[email protected]("npartitions", [1, pytest.param(2,marks=pytest.mark.xfail)])

[email protected](
+    "npartitions",
+    [
+        1,
+        pytest.param(
+            2,
+            marks=pytest.mark.xfail(
+                not dd._compat.PANDAS_GT_110, reason="Fixed upstream."
+            ),
+        ),
+    ],
+)
 def test_timezone_freq(npartitions):
     s_naive = pd.Series(pd.date_range("20130101", periods=10))

s_aware = pd.Series(pd.date_range("20130101", periods=10,tz="US/Eastern"))

@@ -385,12 +399,48 @@ def test_describe_numeric(method, test_values):
         (None, None, None, ["c", "d", "g"]),  # numeric + bool

(None, None, None, ["c", "d", "f", "g"]), # numeric + bool +timedelta

         (None, None, None, ["f", "g"]),  # bool + timedelta
-        ("all", None, None, None),
-        (["number"], None, [0.25, 0.5], None),
-        ([np.timedelta64], None, None, None),
-        (["number", "object"], None, [0.25, 0.75], None),
-        (None, ["number", "object"], None, None),
-        (["object", "datetime", "bool"], None, None, None),
+        pytest.param(
+            "all",
+            None,
+            None,
+            None,