commit python-dask for openSUSE:Factory

root Mon, 09 Dec 2019 12:36:55 -0800

Hello community,

here is the log from the commit of package python-dask for openSUSE:Factory 
checked in at 2019-12-09 21:35:45
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-dask (Old)
 and      /work/SRC/openSUSE:Factory/.python-dask.new.4691 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-dask"

Mon Dec  9 21:35:45 2019 rev:25 rq:755171 version:2.9.0

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-dask/python-dask.changes  2019-11-30 
10:40:48.708147466 +0100
+++ /work/SRC/openSUSE:Factory/.python-dask.new.4691/python-dask.changes        
2019-12-09 21:36:03.158078558 +0100
@@ -1,0 +2,29 @@
+Sat Dec  7 19:08:29 UTC 2019 - Arun Persaud <[email protected]>
+
+- update to version 2.9.0:
+  * Array
+    + Fix da.std to work with NumPy arrays (:pr:`5681`) James Bourbeau
+  * Core
+    + Register sizeof functions for Numba and RMM (:pr:`5668`) John A
+      Kirkham
+    + Update meeting time (:pr:`5682`) Tom Augspurger
+  * DataFrame
+    + Modify dd.DataFrame.drop to use shallow copy (:pr:`5675`)
+      Richard J Zamora
+    + Fix bug in _get_md_row_groups (:pr:`5673`) Richard J Zamora
+    + Close sqlalchemy engine after querying DB (:pr:`5629`) Krishan
+      Bhasin
+    + Allow dd.map_partitions to not enforce meta (:pr:`5660`) Matthew
+      Rocklin
+    + Generalize concat_unindexed_dataframes to support cudf-backend
+      (:pr:`5659`) Richard J Zamora
+    + Add dataframe resample methods (:pr:`5636`) Ben Zaitlen
+    + Compute length of dataframe as length of first column
+      (:pr:`5635`) Matthew Rocklin
+  * Documentation
+    + Doc fixup (:pr:`5665`) James Bourbeau
+    + Update doc build instructions (:pr:`5640`) James Bourbeau
+    + Fix ADL link (:pr:`5639`) Ray Bell
+    + Add documentation build (:pr:`5617`) James Bourbeau
+
+-------------------------------------------------------------------

Old:
----
  dask-2.8.1.tar.gz

New:
----
  dask-2.9.0.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-dask.spec ++++++
--- /var/tmp/diff_new_pack.dDBzzg/_old  2019-12-09 21:36:04.170078159 +0100
+++ /var/tmp/diff_new_pack.dDBzzg/_new  2019-12-09 21:36:04.174078158 +0100
@@ -27,7 +27,7 @@
 %endif
 %define         skip_python2 1
 Name:           python-dask%{psuffix}
-Version:        2.8.1
+Version:        2.9.0
 Release:        0
 Summary:        Minimal task scheduling abstraction
 License:        BSD-3-Clause

++++++ dask-2.8.1.tar.gz -> dask-2.9.0.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/PKG-INFO new/dask-2.9.0/PKG-INFO
--- old/dask-2.8.1/PKG-INFO     2019-11-23 05:31:55.000000000 +0100
+++ new/dask-2.9.0/PKG-INFO     2019-12-06 22:48:02.000000000 +0100
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dask
-Version: 2.8.1
+Version: 2.9.0
 Summary: Parallel PyData with Task Scheduling
 Home-page: https://github.com/dask/dask/
 Maintainer: Matthew Rocklin
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/_version.py 
new/dask-2.9.0/dask/_version.py
--- old/dask-2.8.1/dask/_version.py     2019-11-23 05:31:55.000000000 +0100
+++ new/dask-2.9.0/dask/_version.py     2019-12-06 22:48:02.000000000 +0100
@@ -11,8 +11,8 @@
 {
  "dirty": false,
  "error": null,
- "full-revisionid": "eee9b78da60c24897e1df984f01dd9f36245fcb1",
- "version": "2.8.1"
+ "full-revisionid": "5a96ec7c04877487c7c6ae4f9bb1802566f7e36e",
+ "version": "2.9.0"
 }
 '''  # END VERSION_JSON
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/array/reductions.py 
new/dask-2.9.0/dask/array/reductions.py
--- old/dask-2.8.1/dask/array/reductions.py     2019-11-20 02:10:36.000000000 
+0100
+++ new/dask-2.9.0/dask/array/reductions.py     2019-12-06 22:32:26.000000000 
+0100
@@ -813,7 +813,8 @@
 @wraps(chunk.std)
 def std(a, axis=None, dtype=None, keepdims=False, ddof=0, split_every=None, 
out=None):
     result = sqrt(
-        a.var(
+        var(
+            a,
             axis=axis,
             dtype=dtype,
             keepdims=keepdims,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/array/tests/test_reductions.py 
new/dask-2.9.0/dask/array/tests/test_reductions.py
--- old/dask-2.8.1/dask/array/tests/test_reductions.py  2019-11-20 
02:10:36.000000000 +0100
+++ new/dask-2.9.0/dask/array/tests/test_reductions.py  2019-12-06 
22:32:26.000000000 +0100
@@ -49,6 +49,9 @@
 
 def reduction_1d_test(da_func, darr, np_func, narr, use_dtype=True, 
split_every=True):
     assert_eq(da_func(darr), np_func(narr))
+    assert_eq(
+        da_func(narr), np_func(narr)
+    )  # Ensure Dask reductions work with NumPy arrays
     assert_eq(da_func(darr, keepdims=True), np_func(narr, keepdims=True))
     assert_eq(da_func(darr, axis=()), np_func(narr, axis=()))
     assert same_keys(da_func(darr), da_func(darr))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/dataframe/core.py 
new/dask-2.9.0/dask/dataframe/core.py
--- old/dask-2.8.1/dask/dataframe/core.py       2019-11-23 05:10:40.000000000 
+0100
+++ new/dask-2.9.0/dask/dataframe/core.py       2019-12-06 22:32:26.000000000 
+0100
@@ -70,6 +70,7 @@
     valid_divisions,
     hash_object_dispatch,
     check_matching_columns,
+    drop_by_shallow_copy,
 )
 
 no_default = "__no_default__"
@@ -417,13 +418,19 @@
     def index(self):
         """Return dask Index instance"""
         return self.map_partitions(
-            getattr, "index", token=self._name + "-index", 
meta=self._meta.index
+            getattr,
+            "index",
+            token=self._name + "-index",
+            meta=self._meta.index,
+            enforce_metadata=False,
         )
 
     @index.setter
     def index(self, value):
         self.divisions = value.divisions
-        result = map_partitions(methods.assign_index, self, value)
+        result = map_partitions(
+            methods.assign_index, self, value, enforce_metadata=False
+        )
         self.dask = result.dask
         self._name = result._name
         self._meta = result._meta
@@ -448,7 +455,9 @@
         drop : boolean, default False
             Do not try to insert index into dataframe columns.
         """
-        return self.map_partitions(M.reset_index, drop=drop).clear_divisions()
+        return self.map_partitions(
+            M.reset_index, drop=drop, enforce_metadata=False
+        ).clear_divisions()
 
     @property
     def known_divisions(self):
@@ -1155,6 +1164,7 @@
                 limit=limit,
                 axis=axis,
                 meta=meta,
+                enforce_metadata=False,
                 **kwargs
             )
 
@@ -1247,7 +1257,11 @@
     @derived_from(pd.DataFrame)
     def replace(self, to_replace=None, value=None, regex=False):
         return self.map_partitions(
-            M.replace, to_replace=to_replace, value=value, regex=regex
+            M.replace,
+            to_replace=to_replace,
+            value=value,
+            regex=regex,
+            enforce_metadata=False,
         )
 
     def to_dask_array(self, lengths=None):
@@ -1271,7 +1285,7 @@
         -------
         """
         if lengths is True:
-            lengths = tuple(self.map_partitions(len).compute())
+            lengths = tuple(self.map_partitions(len, 
enforce_metadata=False).compute())
 
         arr = self.values
 
@@ -1401,7 +1415,9 @@
             raise TypeError("periods must be an integer")
 
         if axis == 1:
-            return self.map_partitions(M.diff, token="diff", periods=periods, 
axis=1)
+            return self.map_partitions(
+                M.diff, token="diff", periods=periods, axis=1, 
enforce_metadata=False
+            )
 
         before, after = (periods, 0) if periods > 0 else (0, -periods)
         return self.map_overlap(M.diff, before, after, token="diff", 
periods=periods)
@@ -1414,7 +1430,12 @@
 
         if axis == 1:
             return self.map_partitions(
-                M.shift, token="shift", periods=periods, freq=freq, axis=1
+                M.shift,
+                token="shift",
+                periods=periods,
+                freq=freq,
+                axis=1,
+                enforce_metadata=False,
             )
 
         if freq is None:
@@ -1431,6 +1452,7 @@
             periods=periods,
             freq=freq,
             meta=meta,
+            enforce_metadata=False,
             transform_divisions=False,
         )
         return maybe_shift_divisions(out, periods, freq=freq)
@@ -1464,7 +1486,7 @@
     def abs(self):
         _raise_if_object_series(self, "abs")
         meta = self._meta_nonempty.abs()
-        return self.map_partitions(M.abs, meta=meta)
+        return self.map_partitions(M.abs, meta=meta, enforce_metadata=False)
 
     @derived_from(pd.DataFrame)
     def all(self, axis=None, skipna=True, split_every=False, out=None):
@@ -1543,6 +1565,7 @@
                 token=self._token_prefix + fn,
                 skipna=skipna,
                 axis=axis,
+                enforce_metadata=False,
             )
         else:
             scalar = not is_series_like(meta)
@@ -1575,6 +1598,7 @@
                 token=self._token_prefix + fn,
                 skipna=skipna,
                 axis=axis,
+                enforce_metadata=False,
             )
         else:
             scalar = not is_series_like(meta)
@@ -1600,7 +1624,9 @@
         token = self._token_prefix + "count"
         if axis == 1:
             meta = self._meta_nonempty.count(axis=axis)
-            return self.map_partitions(M.count, meta=meta, token=token, 
axis=axis)
+            return self.map_partitions(
+                M.count, meta=meta, token=token, axis=axis, 
enforce_metadata=False
+            )
         else:
             meta = self._meta_nonempty.count()
             result = self.reduction(
@@ -1627,6 +1653,7 @@
                 token=self._token_prefix + "mean",
                 axis=axis,
                 skipna=skipna,
+                enforce_metadata=False,
             )
             return handle_out(out, result)
         else:
@@ -1634,7 +1661,14 @@
             s = num.sum(skipna=skipna, split_every=split_every)
             n = num.count(split_every=split_every)
             name = self._token_prefix + "mean-%s" % tokenize(self, axis, 
skipna)
-            result = map_partitions(methods.mean_aggregate, s, n, token=name, 
meta=meta)
+            result = map_partitions(
+                methods.mean_aggregate,
+                s,
+                n,
+                token=name,
+                meta=meta,
+                enforce_metadata=False,
+            )
             if isinstance(self, DataFrame):
                 result.divisions = (min(self.columns), max(self.columns))
             return handle_out(out, result)
@@ -1655,6 +1689,7 @@
                 axis=axis,
                 skipna=skipna,
                 ddof=ddof,
+                enforce_metadata=False,
             )
             return handle_out(out, result)
         else:
@@ -1800,12 +1835,15 @@
                 axis=axis,
                 skipna=skipna,
                 ddof=ddof,
+                enforce_metadata=False,
             )
             return handle_out(out, result)
         else:
             v = self.var(skipna=skipna, ddof=ddof, split_every=split_every)
             name = self._token_prefix + "std"
-            result = map_partitions(np.sqrt, v, meta=meta, token=name)
+            result = map_partitions(
+                np.sqrt, v, meta=meta, token=name, enforce_metadata=False
+            )
             return handle_out(out, result)
 
     @derived_from(pd.DataFrame)
@@ -1828,7 +1866,9 @@
             v = num.var(skipna=skipna, ddof=ddof, split_every=split_every)
             n = num.count(split_every=split_every)
             name = self._token_prefix + "sem"
-            result = map_partitions(np.sqrt, v / n, meta=meta, token=name)
+            result = map_partitions(
+                np.sqrt, v / n, meta=meta, token=name, enforce_metadata=False
+            )
 
             if isinstance(self, DataFrame):
                 result.divisions = (min(self.columns), max(self.columns))
@@ -1856,7 +1896,13 @@
                 # Not supported, the result will have current index as columns
                 raise ValueError("'q' must be scalar when axis=1 is specified")
             return map_partitions(
-                M.quantile, self, q, axis, token=keyname, meta=(q, "f8")
+                M.quantile,
+                self,
+                q,
+                axis,
+                token=keyname,
+                enforce_metadata=False,
+                meta=(q, "f8"),
             )
         else:
             _raise_if_object_series(self, "quantile")
@@ -2136,24 +2182,24 @@
     def where(self, cond, other=np.nan):
         # cond and other may be dask instance,
         # passing map_partitions via keyword will not be aligned
-        return map_partitions(M.where, self, cond, other)
+        return map_partitions(M.where, self, cond, other, 
enforce_metadata=False)
 
     @derived_from(pd.DataFrame)
     def mask(self, cond, other=np.nan):
-        return map_partitions(M.mask, self, cond, other)
+        return map_partitions(M.mask, self, cond, other, 
enforce_metadata=False)
 
     @derived_from(pd.DataFrame)
     def notnull(self):
-        return self.map_partitions(M.notnull)
+        return self.map_partitions(M.notnull, enforce_metadata=False)
 
     @derived_from(pd.DataFrame)
     def isnull(self):
-        return self.map_partitions(M.isnull)
+        return self.map_partitions(M.isnull, enforce_metadata=False)
 
     @derived_from(pd.DataFrame)
     def isna(self):
         if hasattr(pd, "isna"):
-            return self.map_partitions(M.isna)
+            return self.map_partitions(M.isna, enforce_metadata=False)
         else:
             raise NotImplementedError(
                 "Need more recent version of Pandas "
@@ -2174,7 +2220,9 @@
         # We wrap values in a delayed for two reasons:
         # - avoid serializing data in every task
         # - avoid cost of traversal of large list in optimizations
-        return self.map_partitions(M.isin, delayed(values), meta=meta)
+        return self.map_partitions(
+            M.isin, delayed(values), meta=meta, enforce_metadata=False
+        )
 
     @derived_from(pd.DataFrame)
     def astype(self, dtype):
@@ -2195,7 +2243,9 @@
             meta = clear_known_categories(meta, cols=set_unknown)
         elif is_categorical_dtype(dtype) and getattr(dtype, "categories", 
None) is None:
             meta = clear_known_categories(meta)
-        return self.map_partitions(M.astype, dtype=dtype, meta=meta)
+        return self.map_partitions(
+            M.astype, dtype=dtype, meta=meta, enforce_metadata=False
+        )
 
     @derived_from(pd.Series)
     def append(self, other, interleave_partitions=False):
@@ -2217,7 +2267,12 @@
             M.align, self, other, join, axis=axis, fill_value=fill_value
         )
         aligned = self.map_partitions(
-            M.align, other, join=join, axis=axis, fill_value=fill_value
+            M.align,
+            other,
+            join=join,
+            axis=axis,
+            fill_value=fill_value,
+            enforce_metadata=False,
         )
 
         token = tokenize(self, other, join, axis, fill_value)
@@ -2602,7 +2657,7 @@
             res = self if inplace else self.copy()
             res.name = index
         else:
-            res = self.map_partitions(M.rename, index)
+            res = self.map_partitions(M.rename, index, enforce_metadata=False)
             if self.known_divisions:
                 if sorted_index and (callable(index) or is_dict_like(index)):
                     old = pd.Series(range(self.npartitions + 1), 
index=self.divisions)
@@ -2704,7 +2759,7 @@
     @derived_from(pd.Series, version="0.25.0")
     def explode(self):
         meta = self._meta.explode()
-        return self.map_partitions(M.explode, meta=meta)
+        return self.map_partitions(M.explode, meta=meta, 
enforce_metadata=False)
 
     def unique(self, split_every=None, split_out=1):
         """
@@ -2802,7 +2857,7 @@
 
     @derived_from(pd.Series)
     def dropna(self):
-        return self.map_partitions(M.dropna)
+        return self.map_partitions(M.dropna, enforce_metadata=False)
 
     @derived_from(pd.Series)
     def between(self, left, right, inclusive=True):
@@ -2815,15 +2870,21 @@
         if out is not None:
             raise ValueError("'out' must be None")
         # np.clip may pass out
-        return self.map_partitions(M.clip, lower=lower, upper=upper)
+        return self.map_partitions(
+            M.clip, lower=lower, upper=upper, enforce_metadata=False
+        )
 
     @derived_from(pd.Series)
     def clip_lower(self, threshold):
-        return self.map_partitions(M.clip_lower, threshold=threshold)
+        return self.map_partitions(
+            M.clip_lower, threshold=threshold, enforce_metadata=False
+        )
 
     @derived_from(pd.Series)
     def clip_upper(self, threshold):
-        return self.map_partitions(M.clip_upper, threshold=threshold)
+        return self.map_partitions(
+            M.clip_upper, threshold=threshold, enforce_metadata=False
+        )
 
     @derived_from(pd.Series)
     def align(self, other, join="outer", axis=None, fill_value=None):
@@ -2992,7 +3053,9 @@
 
     @derived_from(pd.Series)
     def memory_usage(self, index=True, deep=False):
-        result = self.map_partitions(M.memory_usage, index=index, deep=deep)
+        result = self.map_partitions(
+            M.memory_usage, index=index, deep=deep, enforce_metadata=False
+        )
         return delayed(sum)(result.to_delayed())
 
     def __divmod__(self, other):
@@ -3222,6 +3285,14 @@
 
         return _iLocIndexer(self)
 
+    def __len__(self):
+        try:
+            s = self[self.columns[0]]
+        except IndexError:
+            return super().__len__()
+        else:
+            return len(s)
+
     def __getitem__(self, key):
         name = "getitem-%s" % tokenize(self, key)
         if np.isscalar(key) or isinstance(key, (tuple, str)):
@@ -3582,21 +3653,29 @@
 
     @derived_from(pd.DataFrame)
     def dropna(self, how="any", subset=None, thresh=None):
-        return self.map_partitions(M.dropna, how=how, subset=subset, 
thresh=thresh)
+        return self.map_partitions(
+            M.dropna, how=how, subset=subset, thresh=thresh, 
enforce_metadata=False
+        )
 
     @derived_from(pd.DataFrame)
     def clip(self, lower=None, upper=None, out=None):
         if out is not None:
             raise ValueError("'out' must be None")
-        return self.map_partitions(M.clip, lower=lower, upper=upper)
+        return self.map_partitions(
+            M.clip, lower=lower, upper=upper, enforce_metadata=False
+        )
 
     @derived_from(pd.DataFrame)
     def clip_lower(self, threshold):
-        return self.map_partitions(M.clip_lower, threshold=threshold)
+        return self.map_partitions(
+            M.clip_lower, threshold=threshold, enforce_metadata=False
+        )
 
     @derived_from(pd.DataFrame)
     def clip_upper(self, threshold):
-        return self.map_partitions(M.clip_upper, threshold=threshold)
+        return self.map_partitions(
+            M.clip_upper, threshold=threshold, enforce_metadata=False
+        )
 
     @derived_from(pd.DataFrame)
     def squeeze(self, axis=None):
@@ -3623,7 +3702,7 @@
     @derived_from(pd.DataFrame, version="0.25.0")
     def explode(self, column):
         meta = self._meta.explode(column)
-        return self.map_partitions(M.explode, column, meta=meta)
+        return self.map_partitions(M.explode, column, meta=meta, 
enforce_metadata=False)
 
     def to_bag(self, index=False):
         """Convert to a dask Bag of tuples of each row.
@@ -3672,7 +3751,7 @@
         axis = self._validate_axis(axis)
         if (axis == 1) or (columns is not None):
             return self.map_partitions(
-                M.drop, labels=labels, axis=axis, columns=columns, 
errors=errors
+                drop_by_shallow_copy, columns or labels, errors=errors
             )
         raise NotImplementedError(
             "Drop currently only works for axis=1 or when columns is not None"
@@ -3876,11 +3955,18 @@
                         meta=meta,
                         axis=axis,
                         fill_value=fill_value,
+                        enforce_metadata=False,
                     )
 
             meta = _emulate(op, self, other, axis=axis, fill_value=fill_value)
             return map_partitions(
-                op, self, other, meta=meta, axis=axis, fill_value=fill_value
+                op,
+                self,
+                other,
+                meta=meta,
+                axis=axis,
+                fill_value=fill_value,
+                enforce_metadata=False,
             )
 
         meth.__doc__ = skip_doctest(op.__doc__)
@@ -4742,7 +4828,14 @@
 
 
 @insert_meta_param_description
-def map_partitions(func, *args, **kwargs):
+def map_partitions(
+    func,
+    *args,
+    meta=no_default,
+    enforce_metadata=True,
+    transform_divisions=True,
+    **kwargs
+):
     """ Apply Python function on each DataFrame partition.
 
     Parameters
@@ -4755,11 +4848,13 @@
         ``Scalar``, ``Delayed`` or regular python objects. DataFrame-like args
         (both dask and pandas) will be repartitioned to align (if necessary)
         before applying the function.
+    enforce_metadata : bool
+        Whether or not to enforce the structure of the metadata at runtime.
+        This will rename and reorder columns for each partition,
+        and will raise an error if this doesn't work or types don't match.
     $META
     """
-    meta = kwargs.pop("meta", no_default)
     name = kwargs.pop("token", None)
-    transform_divisions = kwargs.pop("transform_divisions", True)
 
     assert callable(func)
     if name is not None:
@@ -4813,21 +4908,33 @@
             args2.append(arg)
 
     kwargs3 = {}
+    simple = True
     for k, v in kwargs.items():
         v = normalize_arg(v)
         v, collections = unpack_collections(v)
         dependencies.extend(collections)
         kwargs3[k] = v
+        if collections:
+            simple = False
 
-    dsk = partitionwise_graph(
-        apply_and_enforce,
-        name,
-        *args2,
-        dependencies=dependencies,
-        _func=func,
-        _meta=meta,
-        **kwargs3
-    )
+    if enforce_metadata:
+        dsk = partitionwise_graph(
+            apply_and_enforce,
+            name,
+            *args2,
+            dependencies=dependencies,
+            _func=func,
+            _meta=meta,
+            **kwargs3
+        )
+    elif not simple:
+        dsk = partitionwise_graph(
+            apply, name, func, *args2, **kwargs3, dependencies=dependencies
+        )
+    else:
+        dsk = partitionwise_graph(
+            func, name, *args2, **kwargs, dependencies=dependencies
+        )
 
     divisions = dfs[0].divisions
     if transform_divisions and isinstance(dfs[0], Index) and len(dfs) == 1:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/dataframe/io/parquet/arrow.py 
new/dask-2.9.0/dask/dataframe/io/parquet/arrow.py
--- old/dask-2.8.1/dask/dataframe/io/parquet/arrow.py   2019-11-23 
05:12:29.000000000 +0100
+++ new/dask-2.9.0/dask/dataframe/io/parquet/arrow.py   2019-12-06 
22:32:26.000000000 +0100
@@ -31,7 +31,7 @@
             row_group = piece.get_metadata().row_group(rg)
             for c in range(row_group.num_columns):
                 if not row_group.column(c).statistics:
-                    return []
+                    return (None, None)
             row_groups.append(row_group)
         row_groups_per_piece.append(num_row_groups)
     if len(row_groups) == len(pieces):
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/dataframe/io/sql.py 
new/dask-2.9.0/dask/dataframe/io/sql.py
--- old/dask-2.8.1/dask/dataframe/io/sql.py     2019-11-20 02:10:36.000000000 
+0100
+++ new/dask-2.9.0/dask/dataframe/io/sql.py     2019-12-06 22:32:26.000000000 
+0100
@@ -195,6 +195,8 @@
             )
         )
 
+    engine.dispose()
+
     return from_delayed(parts, meta, divisions=divisions)
 
 
@@ -202,8 +204,9 @@
     import sqlalchemy as sa
 
     engine_kwargs = engine_kwargs or {}
-    conn = sa.create_engine(uri, **engine_kwargs)
-    df = pd.read_sql(q, conn, **kwargs)
+    engine = sa.create_engine(uri, **engine_kwargs)
+    df = pd.read_sql(q, engine, **kwargs)
+    engine.dispose()
     if df.empty:
         return meta
     else:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/dataframe/multi.py 
new/dask-2.9.0/dask/dataframe/multi.py
--- old/dask-2.8.1/dask/dataframe/multi.py      2019-11-20 02:10:36.000000000 
+0100
+++ new/dask-2.9.0/dask/dataframe/multi.py      2019-12-06 22:23:39.000000000 
+0100
@@ -862,7 +862,7 @@
 def concat_and_check(dfs):
     if len(set(map(len, dfs))) != 1:
         raise ValueError("Concatenated DataFrames of different lengths")
-    return pd.concat(dfs, axis=1)
+    return methods.concat(dfs, axis=1)
 
 
 def concat_unindexed_dataframes(dfs):
@@ -873,7 +873,7 @@
         for i in range(dfs[0].npartitions)
     }
 
-    meta = pd.concat([df._meta for df in dfs], axis=1)
+    meta = methods.concat([df._meta for df in dfs], axis=1)
 
     graph = HighLevelGraph.from_collections(name, dsk, dependencies=dfs)
     return new_dd_object(graph, name, meta, dfs[0].divisions)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/dataframe/tests/test_dataframe.py 
new/dask-2.9.0/dask/dataframe/tests/test_dataframe.py
--- old/dask-2.8.1/dask/dataframe/tests/test_dataframe.py       2019-11-22 
04:35:27.000000000 +0100
+++ new/dask-2.9.0/dask/dataframe/tests/test_dataframe.py       2019-12-06 
22:32:26.000000000 +0100
@@ -259,6 +259,8 @@
     assert ds.name == "z"
     assert_eq(ds, s)
 
+
+def test_rename_series_method_2():
     # Series index
     s = pd.Series(["a", "b", "c", "d", "e", "f", "g"], name="x")
     ds = dd.from_pandas(s, 2)
@@ -1018,6 +1020,8 @@
 def test_len():
     assert len(d) == len(full)
     assert len(d.a) == len(full.a)
+    assert len(dd.from_pandas(pd.DataFrame(), npartitions=1)) == 0
+    assert len(dd.from_pandas(pd.DataFrame(columns=[1, 2]), npartitions=1)) == 0
 
 
 def test_size():
@@ -4111,3 +4115,13 @@
     assert s.name == "y"
     assert ddf.columns == ["x"]
     assert_eq(ddf, df[["x"]])
+
+
+def test_simple_map_partitions():
+    data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
+    df = pd.DataFrame(data)
+    ddf = dd.from_pandas(df, npartitions=2)
+    ddf = ddf.clip(-4, 6)
+    task = ddf.__dask_graph__()[ddf.__dask_keys__()[0]]
+    [v] = task[0].dsk.values()
+    assert v[0] == M.clip or v[1] == M.clip
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/dataframe/tests/test_multi.py 
new/dask-2.9.0/dask/dataframe/tests/test_multi.py
--- old/dask-2.8.1/dask/dataframe/tests/test_multi.py   2019-11-20 
02:10:36.000000000 +0100
+++ new/dask-2.9.0/dask/dataframe/tests/test_multi.py   2019-12-06 
22:23:39.000000000 +0100
@@ -1997,3 +1997,51 @@
         dd.multi.warn_dtype_mismatch(df1, df2, "a", "a")
 
     assert len(r) == 0
+
+
[email protected]("engine", ["pandas", "cudf"])
+def test_groupby_concat_cudf(engine):
+
+    # NOTE: Issue #5643 Reproducer
+
+    size = 6
+    npartitions = 3
+    d1 = pd.DataFrame(
+        {
+            "a": np.random.permutation(np.arange(size)),
+            "b": np.random.randint(100, size=size),
+        }
+    )
+    d2 = pd.DataFrame(
+        {
+            "c": np.random.permutation(np.arange(size)),
+            "d": np.random.randint(100, size=size),
+        }
+    )
+
+    if engine == "cudf":
+        # NOTE: engine == "cudf" requires cudf/dask_cudf,
+        # will be skipped by non-GPU CI.
+
+        cudf = pytest.importorskip("cudf")
+        dask_cudf = pytest.importorskip("dask_cudf")
+
+        d1 = cudf.from_pandas(d1)
+        d2 = cudf.from_pandas(d2)
+        dd1 = dask_cudf.from_cudf(d1, npartitions)
+        dd2 = dask_cudf.from_cudf(d2, npartitions)
+    else:
+        dd1 = dd.from_pandas(d1, npartitions)
+        dd2 = dd.from_pandas(d2, npartitions)
+
+    grouped_d1 = d1.groupby(["a"]).sum()
+    grouped_d2 = d2.groupby(["c"]).sum()
+    res = concat([grouped_d1, grouped_d2], axis=1)
+
+    grouped_dd1 = dd1.groupby(["a"]).sum()
+    grouped_dd2 = dd2.groupby(["c"]).sum()
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", UserWarning)
+        res_dd = dd.concat([grouped_dd1, grouped_dd2], axis=1)
+
+    assert_eq(res_dd.compute().sort_index(), res.sort_index())
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/dataframe/tseries/resample.py 
new/dask-2.9.0/dask/dataframe/tseries/resample.py
--- old/dask-2.8.1/dask/dataframe/tseries/resample.py   2019-11-20 
02:10:36.000000000 +0100
+++ new/dask-2.9.0/dask/dataframe/tseries/resample.py   2019-12-06 
22:23:39.000000000 +0100
@@ -213,6 +213,10 @@
         return self._agg("max")
 
     @derived_from(pd_Resampler)
+    def nunique(self):
+        return self._agg("nunique")
+
+    @derived_from(pd_Resampler)
     def ohlc(self):
         return self._agg("ohlc")
 
@@ -229,9 +233,17 @@
         return self._agg("std")
 
     @derived_from(pd_Resampler)
+    def size(self):
+        return self._agg("size")
+
+    @derived_from(pd_Resampler)
     def sum(self):
         return self._agg("sum")
 
     @derived_from(pd_Resampler)
     def var(self):
         return self._agg("var")
+
+    @derived_from(pd_Resampler)
+    def quantile(self):
+        return self._agg("quantile")
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/dask-2.8.1/dask/dataframe/tseries/tests/test_resample.py 
new/dask-2.9.0/dask/dataframe/tseries/tests/test_resample.py
--- old/dask-2.8.1/dask/dataframe/tseries/tests/test_resample.py        
2019-06-25 19:26:25.000000000 +0200
+++ new/dask-2.9.0/dask/dataframe/tseries/tests/test_resample.py        
2019-12-06 22:23:39.000000000 +0100
@@ -3,7 +3,7 @@
 import pandas as pd
 import pytest
 
-from dask.dataframe.utils import assert_eq
+from dask.dataframe.utils import assert_eq, PANDAS_VERSION
 import dask.dataframe as dd
 
 
@@ -36,6 +36,7 @@
 
     result = resample(ds, freq, how=method, closed=closed, label=label)
     expected = resample(ps, freq, how=method, closed=closed, label=label)
+
     assert_eq(result, expected, check_dtype=False)
 
     divisions = result.divisions
@@ -100,3 +101,18 @@
     ddf = dd.from_pandas(df, npartitions=4)
 
     assert ddf.resample("D").mean().head().index.name == "date"
+
+
[email protected](PANDAS_VERSION <= "0.23.4", reason="quantile not in 0.23")
[email protected]("agg", ["nunique", "mean", "count", "size", 
"quantile"])
+def test_common_aggs(agg):
+    index = pd.date_range("2000-01-01", "2000-02-15", freq="h")
+    ps = pd.Series(range(len(index)), index=index)
+    ds = dd.from_pandas(ps, npartitions=2)
+
+    f = lambda df: getattr(df, agg)()
+
+    res = f(ps.resample("1d"))
+    expected = f(ds.resample("1d"))
+
+    assert_eq(res, expected, check_dtype=False)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/dataframe/utils.py 
new/dask-2.9.0/dask/dataframe/utils.py
--- old/dask-2.8.1/dask/dataframe/utils.py      2019-11-20 02:10:36.000000000 
+0100
+++ new/dask-2.9.0/dask/dataframe/utils.py      2019-12-06 22:32:26.000000000 
+0100
@@ -952,3 +952,13 @@
         return False
 
     return True
+
+
+def drop_by_shallow_copy(df, columns, errors="raise"):
+    """ Use shallow copy to drop columns in place
+    """
+    df2 = df.copy(deep=False)
+    if not pd.api.types.is_list_like(columns):
+        columns = [columns]
+    df2.drop(columns=columns, inplace=True, errors=errors)
+    return df2
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask/sizeof.py 
new/dask-2.9.0/dask/sizeof.py
--- old/dask-2.8.1/dask/sizeof.py       2019-11-20 02:10:36.000000000 +0100
+++ new/dask-2.9.0/dask/sizeof.py       2019-12-06 22:32:26.000000000 +0100
@@ -45,6 +45,27 @@
         return int(x.nbytes)
 
 
[email protected]_lazy("numba")
+def register_numba():
+    import numba.cuda
+
+    @sizeof.register(numba.cuda.cudadrv.devicearray.DeviceNDArray)
+    def sizeof_numba_devicendarray(x):
+        return int(x.nbytes)
+
+
[email protected]_lazy("rmm")
+def register_rmm():
+    import rmm
+
+    # Only included in 0.11.0+
+    if hasattr(rmm, "DeviceBuffer"):
+
+        @sizeof.register(rmm.DeviceBuffer)
+        def sizeof_rmm_devicebuffer(x):
+            return int(x.nbytes)
+
+
 @sizeof.register_lazy("numpy")
 def register_numpy():
     import numpy as np
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/dask.egg-info/PKG-INFO 
new/dask-2.9.0/dask.egg-info/PKG-INFO
--- old/dask-2.8.1/dask.egg-info/PKG-INFO       2019-11-23 05:31:54.000000000 
+0100
+++ new/dask-2.9.0/dask.egg-info/PKG-INFO       2019-12-06 22:48:01.000000000 
+0100
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dask
-Version: 2.8.1
+Version: 2.9.0
 Summary: Parallel PyData with Task Scheduling
 Home-page: https://github.com/dask/dask/
 Maintainer: Matthew Rocklin
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/docs/source/changelog.rst 
new/dask-2.9.0/docs/source/changelog.rst
--- old/dask-2.8.1/docs/source/changelog.rst    2019-11-23 05:27:27.000000000 
+0100
+++ new/dask-2.9.0/docs/source/changelog.rst    2019-12-06 22:45:08.000000000 
+0100
@@ -1,6 +1,36 @@
 Changelog
 =========
 
+2.9.0 / 2019-12-06
+------------------
+
+Array
++++++
+- Fix ``da.std`` to work with NumPy arrays (:pr:`5681`) `James Bourbeau`_
+
+Core
+++++
+- Register ``sizeof`` functions for Numba and RMM (:pr:`5668`) `John A 
Kirkham`_
+- Update meeting time (:pr:`5682`) `Tom Augspurger`_
+
+DataFrame
++++++++++
+- Modify ``dd.DataFrame.drop`` to use shallow copy (:pr:`5675`) `Richard J 
Zamora`_
+- Fix bug in ``_get_md_row_groups`` (:pr:`5673`) `Richard J Zamora`_
+- Close sqlalchemy engine after querying DB (:pr:`5629`) `Krishan Bhasin`_
+- Allow ``dd.map_partitions`` to not enforce meta (:pr:`5660`) `Matthew 
Rocklin`_
+- Generalize ``concat_unindexed_dataframes`` to support cudf-backend 
(:pr:`5659`) `Richard J Zamora`_
+- Add dataframe resample methods (:pr:`5636`) `Ben Zaitlen`_
+- Compute length of dataframe as length of first column (:pr:`5635`) `Matthew 
Rocklin`_
+
+Documentation
++++++++++++++
+- Doc fixup (:pr:`5665`) `James Bourbeau`_
+- Update doc build instructions (:pr:`5640`) `James Bourbeau`_
+- Fix ADL link (:pr:`5639`) `Ray Bell`_
+- Add documentation build (:pr:`5617`) `James Bourbeau`_
+
+
 2.8.1 / 2019-11-22
 ------------------
 
@@ -2730,3 +2760,4 @@
 .. _`Gina Helfrich`: https://github.com/Dr-G
 .. _`ossdev07`: https://github.com/ossdev07
 .. _`Nuno Gomes Silva`: https://github.com/mgsnuno
+.. _`Ray Bell`: https://github.com/raybellwaves
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/docs/source/scheduling.rst 
new/dask-2.9.0/docs/source/scheduling.rst
--- old/dask-2.8.1/docs/source/scheduling.rst   2019-06-23 05:15:12.000000000 
+0200
+++ new/dask-2.9.0/docs/source/scheduling.rst   2019-12-06 22:32:26.000000000 
+0100
@@ -47,7 +47,7 @@
 it incurs no costs to transfer data between tasks.
 However, due to Python's Global Interpreter Lock (GIL),
 this scheduler only provides parallelism when your computation is dominated by 
non-Python code,
-such as is the case when operating on numeric data in NumPy arrays, Pandas 
DataFrames,
+as is primarily the case when operating on numeric data in NumPy arrays, 
Pandas DataFrames,
 or using any of the other C/C++/Cython based projects in the ecosystem.
 
 The threaded scheduler is the default choice for
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/docs/source/setup/cloud.rst 
new/dask-2.9.0/docs/source/setup/cloud.rst
--- old/dask-2.8.1/docs/source/setup/cloud.rst  2019-11-20 02:10:36.000000000 
+0100
+++ new/dask-2.9.0/docs/source/setup/cloud.rst  2019-12-06 22:23:39.000000000 
+0100
@@ -26,7 +26,7 @@
 
 -  `s3fs <https://s3fs.readthedocs.io/>`_ for Amazon's S3
 -  `gcsfs <https://gcsfs.readthedocs.io/>`_ for Google's GCS
--  `adlfs <https://azure-datalake-store.readthedocs.io/>`_ for Microsoft's ADL
+-  `adlfs <https://github.com/dask/adlfs/>`_ for Microsoft's ADL
 
 Historical Libraries
 --------------------
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-2.8.1/docs/source/support.rst 
new/dask-2.9.0/docs/source/support.rst
--- old/dask-2.8.1/docs/source/support.rst      2019-11-20 02:10:36.000000000 
+0100
+++ new/dask-2.9.0/docs/source/support.rst      2019-12-06 22:32:26.000000000 
+0100
@@ -22,13 +22,17 @@
     questions and bug reports on gitter and instead ask people to use Stack
     Overflow or GitHub.
 4.  **Monthly developer meeting** happens the first Thursday of the month at
-    11:00 US Central Time in `this video meeting 
<https://zoom.us/j/802251830>`_.
+    10:00 US Central Time in `this video meeting 
<https://zoom.us/j/802251830>`_.
     Meeting notes are available at
     
https://docs.google.com/document/d/1UqNAP87a56ERH_xkQsS5Q_0PKYybd5Lj2WANy_hRzI0/edit
 
+    .. raw:: html
+
+       <iframe 
src="https://calendar.google.com/calendar/embed?src=4l0vts0c1cgdbq5jhcogj55sfs%40group.calendar.google.com";
 style="border: 0" width="800" height="600" frameborder="0" 
scrolling="no"></iframe>
+
     You can subscribe to this calendar to be notified of changes:
 
-    * `Google Calendar 
<https://calendar.google.com/calendar/embed?src=4l0vts0c1cgdbq5jhcogj55sfs%40group.calendar.google.com&ctz=America%2FChicago>`__
+    * `Google Calendar 
<https://calendar.google.com/calendar/embed?src=4l0vts0c1cgdbq5jhcogj55sfs%40group.calendar.google.com>`__
     * `iCal 
<https://calendar.google.com/calendar/ical/4l0vts0c1cgdbq5jhcogj55sfs%40group.calendar.google.com/public/basic.ics>`__
 
 .. _`Stack Overflow with the #dask tag`: 
https://stackoverflow.com/questions/tagged/dask

commit python-dask for openSUSE:Factory

Reply via email to