commit python-dask for openSUSE:Factory

root Tue, 29 May 2018 01:35:54 -0700

Hello community,

here is the log from the commit of package python-dask for openSUSE:Factory 
checked in at 2018-05-29 10:35:04
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-dask (Old)
 and      /work/SRC/openSUSE:Factory/.python-dask.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-dask"

Tue May 29 10:35:04 2018 rev:4 rq:610881 version:0.17.5

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-dask/python-dask.changes  2018-05-11 
09:18:35.489827487 +0200
+++ /work/SRC/openSUSE:Factory/.python-dask.new/python-dask.changes     
2018-05-29 10:35:06.906209160 +0200
@@ -1,0 +2,6 @@
+Mon May 21 03:57:53 UTC 2018 - a...@gmx.de
+
+- update to version 0.17.5:
+  * Compatibility with pandas 0.23.0 (:pr:`3499`) Tom Augspurger
+
+-------------------------------------------------------------------

Old:
----
  dask-0.17.4.tar.gz

New:
----
  dask-0.17.5.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-dask.spec ++++++
--- /var/tmp/diff_new_pack.twy0V6/_old  2018-05-29 10:35:07.562184937 +0200
+++ /var/tmp/diff_new_pack.twy0V6/_new  2018-05-29 10:35:07.566184789 +0200
@@ -22,7 +22,7 @@
 # python(2/3)-distributed has a dependency loop with python(2/3)-dask
 %bcond_with     test_distributed
 Name:           python-dask
-Version:        0.17.4
+Version:        0.17.5
 Release:        0
 Summary:        Minimal task scheduling abstraction
 License:        BSD-3-Clause

++++++ dask-0.17.4.tar.gz -> dask-0.17.5.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/PKG-INFO new/dask-0.17.5/PKG-INFO
--- old/dask-0.17.4/PKG-INFO    2018-05-03 23:45:10.000000000 +0200
+++ new/dask-0.17.5/PKG-INFO    2018-05-17 00:48:52.000000000 +0200
@@ -1,12 +1,11 @@
-Metadata-Version: 1.0
+Metadata-Version: 2.1
 Name: dask
-Version: 0.17.4
+Version: 0.17.5
 Summary: Parallel PyData with Task Scheduling
 Home-page: http://github.com/dask/dask/
-Author: Matthew Rocklin
-Author-email: mrock...@gmail.com
+Maintainer: Matthew Rocklin
+Maintainer-email: mrock...@gmail.com
 License: BSD
-Description-Content-Type: UNKNOWN
 Description: Dask
         ====
         
@@ -38,3 +37,9 @@
         
 Keywords: task-scheduling parallel numpy pandas pydata
 Platform: UNKNOWN
+Provides-Extra: array
+Provides-Extra: delayed
+Provides-Extra: complete
+Provides-Extra: bag
+Provides-Extra: dataframe
+Provides-Extra: distributed
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/_version.py 
new/dask-0.17.5/dask/_version.py
--- old/dask-0.17.4/dask/_version.py    2018-05-03 23:45:10.000000000 +0200
+++ new/dask-0.17.5/dask/_version.py    2018-05-17 00:48:52.000000000 +0200
@@ -11,8 +11,8 @@
 {
  "dirty": false,
  "error": null,
- "full-revisionid": "13f07b53be51110dbfd42943cc863220fc50ed7e",
- "version": "0.17.4"
+ "full-revisionid": "b2bed5eb145b39641181140b8f64a9b4427b53ba",
+ "version": "0.17.5"
 }
 '''  # END VERSION_JSON
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/array/chunk.py 
new/dask-0.17.5/dask/array/chunk.py
--- old/dask-0.17.4/dask/array/chunk.py 2018-05-02 17:34:11.000000000 +0200
+++ new/dask-0.17.5/dask/array/chunk.py 2018-05-17 00:43:27.000000000 +0200
@@ -235,3 +235,14 @@
     else:
         x = np.asfortranarray(x)
         return x.T.view(dtype).T
+
+
+def einsum(*operands, **kwargs):
+    subscripts = kwargs.pop('subscripts')
+    ncontract_inds = kwargs.pop('ncontract_inds')
+    dtype = kwargs.pop('kernel_dtype')
+    chunk = np.einsum(subscripts, *operands, dtype=dtype, **kwargs)
+
+    # Avoid concatenate=True in atop by adding 1's
+    # for the contracted dimensions
+    return chunk.reshape(chunk.shape + (1,) * ncontract_inds)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/array/einsumfuncs.py 
new/dask-0.17.5/dask/array/einsumfuncs.py
--- old/dask-0.17.4/dask/array/einsumfuncs.py   2018-05-02 17:34:11.000000000 
+0200
+++ new/dask-0.17.5/dask/array/einsumfuncs.py   2018-05-17 00:43:27.000000000 
+0200
@@ -7,6 +7,7 @@
 from numpy.compat import basestring
 
 from .core import (atop, asarray)
+from . import chunk
 
 einsum_symbols = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
 einsum_symbols_set = set(einsum_symbols)
@@ -182,24 +183,20 @@
     return (input_subscripts, output_subscript, operands)
 
 
-def _einsum_kernel(*operands, **kwargs):
-    subscripts = kwargs.pop('subscripts')
-    ncontract_inds = kwargs.pop('ncontract_inds')
-    dtype = kwargs.pop('kernel_dtype')
-    chunk = np.einsum(subscripts, *operands, dtype=dtype, **kwargs)
-
-    # Avoid concatenate=True in atop by adding 1's
-    # for the contracted dimensions
-    return chunk.reshape(chunk.shape + (1,) * ncontract_inds)
-
-
 einsum_can_optimize = LooseVersion(np.__version__) >= LooseVersion("1.12.0")
 
 
 @wraps(np.einsum)
 def einsum(*operands, **kwargs):
-    dtype = kwargs.get('dtype')
-    optimize = kwargs.get('optimize')
+    casting = kwargs.pop('casting', 'safe')
+    dtype = kwargs.pop('dtype', None)
+    optimize = kwargs.pop('optimize', False)
+    order = kwargs.pop('order', 'K')
+    split_every = kwargs.pop('split_every', None)
+    if kwargs:
+        raise TypeError("einsum() got unexpected keyword "
+                        "argument(s) %s" % ",".join(kwargs))
+
     einsum_dtype = dtype
 
     inputs, outputs, ops = parse_einsum_input(operands)
@@ -209,16 +206,18 @@
     if dtype is None:
         dtype = np.result_type(*[o.dtype for o in ops])
 
-    if optimize is None:
-        optimize = False
-
-    if einsum_can_optimize and optimize is not False:
-        # Avoid computation of dask arrays within np.einsum_path
-        # by passing in small numpy arrays broadcasted
-        # up to the right shape
-        fake_ops = [np.broadcast_to(o.dtype.type(0), shape=o.shape)
-                    for o in ops]
-        optimize, _ = np.einsum_path(subscripts, *fake_ops, optimize=optimize)
+    if einsum_can_optimize:
+        if optimize is not False:
+            # Avoid computation of dask arrays within np.einsum_path
+            # by passing in small numpy arrays broadcasted
+            # up to the right shape
+            fake_ops = [np.broadcast_to(o.dtype.type(0), shape=o.shape)
+                        for o in ops]
+            optimize, _ = np.einsum_path(subscripts, *fake_ops,
+                                         optimize=optimize)
+        kwargs = {'optimize': optimize}
+    else:
+        kwargs = {}
 
     inputs = [tuple(i) for i in inputs.split(",")]
 
@@ -229,27 +228,21 @@
     contract_inds = all_inds - set(outputs)
     ncontract_inds = len(contract_inds)
 
-    # Update kwargs with np.einsum parameters
-    kwargs['subscripts'] = subscripts
-    kwargs['kernel_dtype'] = einsum_dtype
-    kwargs['ncontract_inds'] = ncontract_inds
-
-    if einsum_can_optimize:
-        kwargs['optimize'] = optimize
-
-    # Update kwargs with atop parameters
-    kwargs['adjust_chunks'] = {ind: 1 for ind in contract_inds}
-    kwargs['dtype'] = dtype
-
     # Introduce the contracted indices into the atop product
     # so that we get numpy arrays, not lists
-    result = atop(_einsum_kernel, tuple(outputs) + tuple(contract_inds),
+    result = atop(chunk.einsum, tuple(outputs) + tuple(contract_inds),
                   *(a for ap in zip(ops, inputs) for a in ap),
-                  **kwargs)
+                  # atop parameters
+                  adjust_chunks={ind: 1 for ind in contract_inds}, dtype=dtype,
+                  # np.einsum parameters
+                  subscripts=subscripts, kernel_dtype=einsum_dtype,
+                  ncontract_inds=ncontract_inds, order=order,
+                  casting=casting, **kwargs)
 
     # Now reduce over any extra contraction dimensions
     if ncontract_inds > 0:
         size = len(outputs)
-        return result.sum(axis=list(range(size, size + ncontract_inds)))
+        return result.sum(axis=list(range(size, size + ncontract_inds)),
+                          split_every=split_every)
 
     return result
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/array/rechunk.py 
new/dask-0.17.5/dask/array/rechunk.py
--- old/dask-0.17.4/dask/array/rechunk.py       2018-03-06 00:14:39.000000000 
+0100
+++ new/dask-0.17.5/dask/array/rechunk.py       2018-05-17 00:43:27.000000000 
+0200
@@ -191,12 +191,16 @@
 
     >>> blockshape_dict_to_tuple(((4, 4), (5, 5)), {1: 3})
     ((4, 4), (3, 3, 3, 1))
+    >>> blockshape_dict_to_tuple(((4, 4), (5, 5)), {1: -1})
+    ((4, 4), (10,))
+
     """
     shape = tuple(map(sum, old_chunks))
     new_chunks = list(old_chunks)
     for k, v in d.items():
-        div = shape[k] // v
-        mod = shape[k] % v
+        if v == -1:
+            v = shape[k]
+        div, mod = divmod(shape[k], v)
         new_chunks[k] = (v,) * div + ((mod,) if mod else ())
     return tuple(new_chunks)
 
@@ -227,15 +231,17 @@
     Parameters
     ----------
 
-    x:   dask array
-    chunks:  tuple
-        The new block dimensions to create
+    x: dask array
+        Array to be rechunked.
+    chunks:  int, tuple or dict
+        The new block dimensions to create. -1 indicates the full size of the
+        corresponding dimension.
     threshold: int
-        The graph growth factor under which we don't bother
-        introducing an intermediate step
+        The graph growth factor under which we don't bother introducing an
+        intermediate step.
     block_size_limit: int
         The maximum block size (in bytes) we want to produce during an
-        intermediate step
+        intermediate step.
     """
     threshold = threshold or DEFAULT_THRESHOLD
     block_size_limit = block_size_limit or DEFAULT_BLOCK_SIZE_LIMIT
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/array/slicing.py 
new/dask-0.17.5/dask/array/slicing.py
--- old/dask-0.17.4/dask/array/slicing.py       2018-03-06 00:14:39.000000000 
+0100
+++ new/dask-0.17.5/dask/array/slicing.py       2018-05-17 00:43:27.000000000 
+0200
@@ -3,10 +3,10 @@
 from itertools import product
 import math
 from numbers import Integral, Number
-from operator import add, getitem, itemgetter
+from operator import getitem, itemgetter
 
 import numpy as np
-from toolz import accumulate, memoize, merge, pluck, concat
+from toolz import memoize, merge, pluck, concat
 
 from .. import core
 from .. import sharedict
@@ -372,13 +372,16 @@
     >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3))
     {4: slice(-1, -12, -3)}
     """
+    chunk_boundaries = np.cumsum(lengths)
+
     if isinstance(index, Integral):
-        i = 0
-        ind = index
-        lens = list(lengths)
-        while ind >= lens[0]:
-            i += 1
-            ind -= lens.pop(0)
+        # use right-side search to be consistent with previous result
+        i = chunk_boundaries.searchsorted(index, side='right')
+        if i > 0:
+            # the very first chunk has no relative shift
+            ind = index - chunk_boundaries[i - 1]
+        else:
+            ind = index
         return {i: ind}
 
     assert isinstance(index, slice)
@@ -391,7 +394,7 @@
         start = index.start or 0
         stop = index.stop if index.stop is not None else dim_shape
     else:
-        start = index.start or dim_shape - 1
+        start = index.start if index.start is not None else dim_shape - 1
         start = dim_shape - 1 if start >= dim_shape else start
         stop = -(dim_shape + 1) if index.stop is None else index.stop
 
@@ -403,7 +406,19 @@
 
     d = dict()
     if step > 0:
-        for i, length in enumerate(lengths):
+        istart = chunk_boundaries.searchsorted(start, side='right')
+        istop = chunk_boundaries.searchsorted(stop, side='left')
+
+        # the bound is not exactly tight; make it tighter?
+        istop = min(istop + 1, len(lengths))
+
+        # jump directly to istart
+        if istart > 0:
+            start = start - chunk_boundaries[istart - 1]
+            stop = stop - chunk_boundaries[istart - 1]
+
+        for i in range(istart, istop):
+            length = lengths[i]
             if start < length and stop > 0:
                 d[i] = slice(start, min(stop, length), step)
                 start = (start - length) % step
@@ -412,8 +427,16 @@
             stop -= length
     else:
         rstart = start  # running start
-        chunk_boundaries = list(accumulate(add, lengths))
-        for i, chunk_stop in reversed(list(enumerate(chunk_boundaries))):
+
+        istart = chunk_boundaries.searchsorted(start, side='left')
+        istop = chunk_boundaries.searchsorted(stop, side='right')
+
+        # the bound is not exactly tight; make it tighter?
+        istart = min(istart + 1, len(chunk_boundaries) - 1)
+        istop = max(istop - 1, -1)
+
+        for i in range(istart, istop, -1):
+            chunk_stop = chunk_boundaries[i]
             # create a chunk start and stop
             if i == 0:
                 chunk_start = 0
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/array/tests/test_rechunk.py 
new/dask-0.17.5/dask/array/tests/test_rechunk.py
--- old/dask-0.17.4/dask/array/tests/test_rechunk.py    2018-03-06 
00:14:39.000000000 +0100
+++ new/dask-0.17.5/dask/array/tests/test_rechunk.py    2018-05-17 
00:43:27.000000000 +0200
@@ -157,6 +157,10 @@
     y = x.rechunk(chunks={0: (12, 12)})
     assert y.chunks == ((12, 12), (8, 8, 8))
 
+    x = da.ones((24, 24), chunks=(4, 8))
+    y = x.rechunk(chunks={0: -1})
+    assert y.chunks == ((24,), (8, 8, 8))
+
 
 def test_rechunk_with_empty_input():
     x = da.ones((24, 24), chunks=(4, 8))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/array/tests/test_routines.py 
new/dask-0.17.5/dask/array/tests/test_routines.py
--- old/dask-0.17.4/dask/array/tests/test_routines.py   2018-05-03 
23:39:27.000000000 +0200
+++ new/dask-0.17.5/dask/array/tests/test_routines.py   2018-05-17 
00:43:27.000000000 +0200
@@ -1407,6 +1407,19 @@
               da.einsum(sig, *np_inputs, casting=casting))
 
 
+@pytest.mark.parametrize('split_every', [None, 2])
+def test_einsum_split_every(split_every):
+    np_inputs, da_inputs = _numpy_and_dask_inputs('a')
+    assert_eq(np.einsum('a', *np_inputs),
+              da.einsum('a', *da_inputs, split_every=split_every))
+
+
+def test_einsum_invalid_args():
+    _, da_inputs = _numpy_and_dask_inputs('a')
+    with pytest.raises(TypeError):
+        da.einsum('a', *da_inputs, foo=1, bar=2)
+
+
 def test_einsum_broadcasting_contraction():
     a = np.random.rand(1, 5, 4)
     b = np.random.rand(4, 6)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/core.py 
new/dask-0.17.5/dask/dataframe/core.py
--- old/dask-0.17.4/dask/dataframe/core.py      2018-05-03 23:39:27.000000000 
+0200
+++ new/dask-0.17.5/dask/dataframe/core.py      2018-05-17 00:43:27.000000000 
+0200
@@ -38,7 +38,8 @@
 from .optimize import optimize
 from .utils import (meta_nonempty, make_meta, insert_meta_param_description,
                     raise_on_meta_error, clear_known_categories,
-                    is_categorical_dtype, has_known_categories, PANDAS_VERSION)
+                    is_categorical_dtype, has_known_categories, PANDAS_VERSION,
+                    index_summary)
 
 no_default = '__no_default__'
 
@@ -2780,7 +2781,8 @@
         bind_method(cls, name, meth)
 
     @insert_meta_param_description(pad=12)
-    def apply(self, func, axis=0, args=(), meta=no_default, **kwds):
+    def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None,
+              args=(), meta=no_default, **kwds):
         """ Parallel version of pandas.DataFrame.apply
 
         This mimics the pandas version except for the following:
@@ -2842,6 +2844,17 @@
         """
 
         axis = self._validate_axis(axis)
+        pandas_kwargs = {
+            'axis': axis,
+            'broadcast': broadcast,
+            'raw': raw,
+            'reduce': None,
+        }
+
+        if PANDAS_VERSION >= '0.23.0':
+            kwds.setdefault('result_type', None)
+
+        kwds.update(pandas_kwargs)
 
         if axis == 0:
             msg = ("dd.DataFrame.apply only supports axis=1\n"
@@ -2857,10 +2870,9 @@
             warnings.warn(msg)
 
             meta = _emulate(M.apply, self._meta_nonempty, func,
-                            axis=axis, args=args, udf=True, **kwds)
+                            args=args, udf=True, **kwds)
 
-        return map_partitions(M.apply, self, func, axis,
-                              False, False, None, args, meta=meta, **kwds)
+        return map_partitions(M.apply, self, func, args=args, meta=meta, 
**kwds)
 
     @derived_from(pd.DataFrame)
     def applymap(self, func, meta='__no_default__'):
@@ -2909,7 +2921,7 @@
         if verbose:
             index = computations['index']
             counts = computations['count']
-            lines.append(index.summary())
+            lines.append(index_summary(index))
             lines.append('Data columns (total {} 
columns):'.format(len(self.columns)))
 
             if PANDAS_VERSION >= '0.20.0':
@@ -2921,7 +2933,7 @@
             column_info = [column_template.format(pprint_thing(x[0]), x[1], 
x[2])
                            for x in zip(self.columns, counts, self.dtypes)]
         else:
-            column_info = [self.columns.summary(name='Columns')]
+            column_info = [index_summary(self.columns, name='Columns')]
 
         lines.extend(column_info)
         dtype_counts = ['%s(%d)' % k for k in 
sorted(self.dtypes.value_counts().iteritems(), key=str)]
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/rolling.py 
new/dask-0.17.5/dask/dataframe/rolling.py
--- old/dask-0.17.4/dask/dataframe/rolling.py   2018-03-06 00:14:39.000000000 
+0100
+++ new/dask-0.17.5/dask/dataframe/rolling.py   2018-05-17 00:43:27.000000000 
+0200
@@ -8,7 +8,7 @@
 from ..base import tokenize
 from ..utils import M, funcname, derived_from
 from .core import _emulate
-from .utils import make_meta
+from .utils import make_meta, PANDAS_VERSION
 
 
 def overlap_chunk(func, prev_part, current_part, next_part, before, after,
@@ -292,8 +292,19 @@
         return self._call_method('quantile', quantile)
 
     @derived_from(pd_Rolling)
-    def apply(self, func, args=(), kwargs={}):
-        return self._call_method('apply', func, args=args, kwargs=kwargs)
+    def apply(self, func, args=(), kwargs={}, **kwds):
+        # TODO: In a future version of pandas this will change to
+        # raw=False. Think about inspecting the function signature and setting
+        # to that?
+        if PANDAS_VERSION >= '0.23.0':
+            kwds.setdefault("raw", None)
+        else:
+            if kwargs:
+                msg = ("Invalid argument to 'apply'. Keyword arguments "
+                       "should be given as a dict to the 'kwargs' arugment. ")
+                raise TypeError(msg)
+        return self._call_method('apply', func, args=args,
+                                 kwargs=kwargs, **kwds)
 
     def __repr__(self):
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/tests/test_categorical.py 
new/dask-0.17.5/dask/dataframe/tests/test_categorical.py
--- old/dask-0.17.4/dask/dataframe/tests/test_categorical.py    2018-03-06 
00:14:39.000000000 +0100
+++ new/dask-0.17.5/dask/dataframe/tests/test_categorical.py    2018-05-17 
00:43:27.000000000 +0200
@@ -119,12 +119,14 @@
 
 
 def test_categorize():
-    meta = clear_known_categories(frames4[0])
+    # rename y to y_ to avoid pandas future warning about ambiguous
+    # levels
+    meta = clear_known_categories(frames4[0]).rename(columns={'y': 'y_'})
     ddf = dd.DataFrame({('unknown', i): df for (i, df) in enumerate(frames3)},
-                       'unknown', meta, [None] * 4)
+                       'unknown', meta, [None] * 4).rename(columns={'y': 'y_'})
     ddf = ddf.assign(w=ddf.w.cat.set_categories(['x', 'y', 'z']))
     assert ddf.w.cat.known
-    assert not ddf.y.cat.known
+    assert not ddf.y_.cat.known
     assert not ddf.index.cat.known
     df = ddf.compute()
 
@@ -132,27 +134,27 @@
         known_index = index is not False
         # By default categorize object and unknown cat columns
         ddf2 = ddf.categorize(index=index)
-        assert ddf2.y.cat.known
+        assert ddf2.y_.cat.known
         assert ddf2.v.cat.known
         assert ddf2.index.cat.known == known_index
         assert_eq(ddf2, df.astype({'v': 'category'}), check_categorical=False)
 
         # Specifying split_every works
         ddf2 = ddf.categorize(index=index, split_every=2)
-        assert ddf2.y.cat.known
+        assert ddf2.y_.cat.known
         assert ddf2.v.cat.known
         assert ddf2.index.cat.known == known_index
         assert_eq(ddf2, df.astype({'v': 'category'}), check_categorical=False)
 
         # Specifying one column doesn't affect others
         ddf2 = ddf.categorize('v', index=index)
-        assert not ddf2.y.cat.known
+        assert not ddf2.y_.cat.known
         assert ddf2.v.cat.known
         assert ddf2.index.cat.known == known_index
         assert_eq(ddf2, df.astype({'v': 'category'}), check_categorical=False)
 
-        ddf2 = ddf.categorize('y', index=index)
-        assert ddf2.y.cat.known
+        ddf2 = ddf.categorize('y_', index=index)
+        assert ddf2.y_.cat.known
         assert ddf2.v.dtype == 'object'
         assert ddf2.index.cat.known == known_index
         assert_eq(ddf2, df)
@@ -188,7 +190,7 @@
     assert ddf.categorize(index=False) is ddf
 
     # Non-object dtype
-    ddf = dd.from_pandas(df.set_index(df.A), npartitions=5)
+    ddf = dd.from_pandas(df.set_index(df.A.rename('idx')), npartitions=5)
     df = ddf.compute()
 
     ddf2 = ddf.categorize(index=True)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/tests/test_dataframe.py 
new/dask-0.17.5/dask/dataframe/tests/test_dataframe.py
--- old/dask-0.17.4/dask/dataframe/tests/test_dataframe.py      2018-05-03 
23:39:27.000000000 +0200
+++ new/dask-0.17.5/dask/dataframe/tests/test_dataframe.py      2018-05-17 
00:43:27.000000000 +0200
@@ -1,4 +1,5 @@
 import sys
+import textwrap
 from distutils.version import LooseVersion
 from itertools import product
 from operator import add
@@ -1612,12 +1613,19 @@
 
     # count dtypes
     tm.assert_series_equal(a.get_dtype_counts(), df.get_dtype_counts())
-    tm.assert_series_equal(a.get_ftype_counts(), df.get_ftype_counts())
 
     tm.assert_series_equal(result.get_dtype_counts(),
                            expected.get_dtype_counts())
-    tm.assert_series_equal(result.get_ftype_counts(),
-                           expected.get_ftype_counts())
+
+    if PANDAS_VERSION >= '0.23.0':
+        ctx = pytest.warns(FutureWarning)
+    else:
+        ctx = pytest.warns(None)
+
+    with ctx:
+        tm.assert_series_equal(a.get_ftype_counts(), df.get_ftype_counts())
+        tm.assert_series_equal(result.get_ftype_counts(),
+                               expected.get_ftype_counts())
 
 
 def test_deterministic_apply_concat_apply_names():
@@ -2097,7 +2105,7 @@
 
 def test_cov_corr_mixed():
     size = 1000
-    d = {'dates' : pd.date_range('2015-01-01', periods=size, frequency='1T'),
+    d = {'dates' : pd.date_range('2015-01-01', periods=size, freq='1T'),
          'unique_id' : np.arange(0, size),
          'ints' : np.random.randint(0, size, size=size),
          'floats' : np.random.randn(size),
@@ -2415,9 +2423,11 @@
 
     buf = StringIO()
     g.info(buf, verbose=False)
-    assert buf.getvalue() == unicode("""<class 'dask.dataframe.core.DataFrame'>
-Columns: 2 entries, (C, count) to (C, sum)
-dtypes: int64(2)""")
+    expected = unicode(textwrap.dedent("""\
+    <class 'dask.dataframe.core.DataFrame'>
+    Columns: 2 entries, ('C', 'count') to ('C', 'sum')
+    dtypes: int64(2)"""))
+    assert buf.getvalue() == expected
 
 
 def test_categorize_info():
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/tests/test_indexing.py 
new/dask-0.17.5/dask/dataframe/tests/test_indexing.py
--- old/dask-0.17.4/dask/dataframe/tests/test_indexing.py       2018-03-06 
00:14:39.000000000 +0100
+++ new/dask-0.17.5/dask/dataframe/tests/test_indexing.py       2018-05-17 
00:43:27.000000000 +0200
@@ -8,7 +8,7 @@
 import dask.dataframe as dd
 
 from dask.dataframe.indexing import _coerce_loc_index
-from dask.dataframe.utils import assert_eq, make_meta
+from dask.dataframe.utils import assert_eq, make_meta, PANDAS_VERSION
 
 
 dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]},
@@ -33,18 +33,30 @@
     assert_eq(d.loc[:8], full.loc[:8])
     assert_eq(d.loc[3:], full.loc[3:])
     assert_eq(d.loc[[5]], full.loc[[5]])
-    assert_eq(d.loc[[3, 4, 1, 8]], full.loc[[3, 4, 1, 8]])
-    assert_eq(d.loc[[3, 4, 1, 9]], full.loc[[3, 4, 1, 9]])
-    assert_eq(d.loc[np.array([3, 4, 1, 9])], full.loc[np.array([3, 4, 1, 9])])
+
+    if PANDAS_VERSION >= '0.23.0':
+        expected_warning = FutureWarning
+    else:
+        expected_warning = None
+
+    with pytest.warns(expected_warning):
+        assert_eq(d.loc[[3, 4, 1, 8]], full.loc[[3, 4, 1, 8]])
+    with pytest.warns(expected_warning):
+        assert_eq(d.loc[[3, 4, 1, 9]], full.loc[[3, 4, 1, 9]])
+    with pytest.warns(expected_warning):
+        assert_eq(d.loc[np.array([3, 4, 1, 9])], full.loc[np.array([3, 4, 1, 
9])])
 
     assert_eq(d.a.loc[5], full.a.loc[5:5])
     assert_eq(d.a.loc[3:8], full.a.loc[3:8])
     assert_eq(d.a.loc[:8], full.a.loc[:8])
     assert_eq(d.a.loc[3:], full.a.loc[3:])
     assert_eq(d.a.loc[[5]], full.a.loc[[5]])
-    assert_eq(d.a.loc[[3, 4, 1, 8]], full.a.loc[[3, 4, 1, 8]])
-    assert_eq(d.a.loc[[3, 4, 1, 9]], full.a.loc[[3, 4, 1, 9]])
-    assert_eq(d.a.loc[np.array([3, 4, 1, 9])], full.a.loc[np.array([3, 4, 1, 
9])])
+    with pytest.warns(expected_warning):
+        assert_eq(d.a.loc[[3, 4, 1, 8]], full.a.loc[[3, 4, 1, 8]])
+    with pytest.warns(expected_warning):
+        assert_eq(d.a.loc[[3, 4, 1, 9]], full.a.loc[[3, 4, 1, 9]])
+    with pytest.warns(expected_warning):
+        assert_eq(d.a.loc[np.array([3, 4, 1, 9])], full.a.loc[np.array([3, 4, 
1, 9])])
     assert_eq(d.a.loc[[]], full.a.loc[[]])
     assert_eq(d.a.loc[np.array([])], full.a.loc[np.array([])])
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/tests/test_rolling.py 
new/dask-0.17.5/dask/dataframe/tests/test_rolling.py
--- old/dask-0.17.4/dask/dataframe/tests/test_rolling.py        2018-03-06 
00:14:39.000000000 +0100
+++ new/dask-0.17.5/dask/dataframe/tests/test_rolling.py        2018-05-17 
00:43:27.000000000 +0200
@@ -3,7 +3,7 @@
 import numpy as np
 
 import dask.dataframe as dd
-from dask.dataframe.utils import assert_eq
+from dask.dataframe.utils import assert_eq, PANDAS_VERSION
 
 N = 40
 df = pd.DataFrame({'a': np.random.randn(N).cumsum(),
@@ -122,18 +122,28 @@
     # DataFrame
     prolling = df.rolling(window, center=center)
     drolling = ddf.rolling(window, center=center)
-    assert_eq(getattr(prolling, method)(*args),
-              getattr(drolling, method)(*args),
+    if method == 'apply' and PANDAS_VERSION >= '0.23.0':
+        kwargs = {'raw': False}
+    else:
+        kwargs = {}
+    assert_eq(getattr(prolling, method)(*args, **kwargs),
+              getattr(drolling, method)(*args, **kwargs),
               check_less_precise=check_less_precise)
 
     # Series
     prolling = df.a.rolling(window, center=center)
     drolling = ddf.a.rolling(window, center=center)
-    assert_eq(getattr(prolling, method)(*args),
-              getattr(drolling, method)(*args),
+    assert_eq(getattr(prolling, method)(*args, **kwargs),
+              getattr(drolling, method)(*args, **kwargs),
               check_less_precise=check_less_precise)
 
 
+@pytest.mark.skipif(PANDAS_VERSION >= '0.23.0', reason="Raw is allowed.")
+def test_rolling_raw_pandas_lt_0230_raises():
+    with pytest.raises(TypeError):
+        df.rolling(2).apply(mad, raw=True)
+
+
 def test_rolling_raises():
     df = pd.DataFrame({'a': np.random.randn(25).cumsum(),
                        'b': np.random.randint(100, size=(25,))})
@@ -209,17 +219,21 @@
 @pytest.mark.parametrize('window', ['1S', '2S', '3S', pd.offsets.Second(5)])
 def test_time_rolling_methods(method, args, window, check_less_precise):
     # DataFrame
+    if method == 'apply' and PANDAS_VERSION >= '0.23.0':
+        kwargs = {"raw": False}
+    else:
+        kwargs = {}
     prolling = ts.rolling(window)
     drolling = dts.rolling(window)
-    assert_eq(getattr(prolling, method)(*args),
-              getattr(drolling, method)(*args),
+    assert_eq(getattr(prolling, method)(*args, **kwargs),
+              getattr(drolling, method)(*args, **kwargs),
               check_less_precise=check_less_precise)
 
     # Series
     prolling = ts.a.rolling(window)
     drolling = dts.a.rolling(window)
-    assert_eq(getattr(prolling, method)(*args),
-              getattr(drolling, method)(*args),
+    assert_eq(getattr(prolling, method)(*args, **kwargs),
+              getattr(drolling, method)(*args, **kwargs),
               check_less_precise=check_less_precise)
 
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/tests/test_ufunc.py 
new/dask-0.17.5/dask/dataframe/tests/test_ufunc.py
--- old/dask-0.17.4/dask/dataframe/tests/test_ufunc.py  2018-05-03 
23:39:27.000000000 +0200
+++ new/dask-0.17.5/dask/dataframe/tests/test_ufunc.py  2018-05-17 
00:43:27.000000000 +0200
@@ -345,17 +345,17 @@
     assert isinstance(dafunc(dask, darray), dask_type)
     assert isinstance(dafunc(darray, dask), dask_type)
 
-    tm.assert_numpy_array_equal(dafunc(dask, darray).compute().as_matrix(),
-                                npfunc(pandas.as_matrix(), darray).compute())
+    tm.assert_numpy_array_equal(dafunc(dask, darray).compute().values,
+                                npfunc(pandas.values, darray).compute())
 
     # applying NumPy ufunc is lazy
     assert isinstance(npfunc(dask, darray), dask_type)
     assert isinstance(npfunc(darray, dask), dask_type)
 
-    tm.assert_numpy_array_equal(npfunc(dask, darray).compute().as_matrix(),
-                                npfunc(pandas.as_matrix(), darray.compute()))
-    tm.assert_numpy_array_equal(npfunc(darray, dask).compute().as_matrix(),
-                                npfunc(darray.compute(), pandas.as_matrix()))
+    tm.assert_numpy_array_equal(npfunc(dask, darray).compute().values,
+                                npfunc(pandas.values, darray.compute()))
+    tm.assert_numpy_array_equal(npfunc(darray, dask).compute().values,
+                                npfunc(darray.compute(), pandas.values))
 
 
 @pytest.mark.parametrize('redfunc', ['sum', 'prod', 'min', 'max', 'mean'])
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/utils.py 
new/dask-0.17.5/dask/dataframe/utils.py
--- old/dask-0.17.4/dask/dataframe/utils.py     2018-04-30 23:52:38.000000000 
+0200
+++ new/dask-0.17.5/dask/dataframe/utils.py     2018-05-17 00:43:27.000000000 
+0200
@@ -498,6 +498,22 @@
                              errmsg))
 
 
+def index_summary(idx, name=None):
+    """Summarized representation of an Index.
+    """
+    n = len(idx)
+    if name is None:
+        name = idx.__class__.__name__
+    if n:
+        head = idx[0]
+        tail = idx[-1]
+        summary = ', {} to {}'.format(head, tail)
+    else:
+        summary = ''
+
+    return "{}: {} entries{}".format(name, n, summary)
+
+
 ###############################################################
 # Testing
 ###############################################################
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask.egg-info/PKG-INFO 
new/dask-0.17.5/dask.egg-info/PKG-INFO
--- old/dask-0.17.4/dask.egg-info/PKG-INFO      2018-05-03 23:45:10.000000000 
+0200
+++ new/dask-0.17.5/dask.egg-info/PKG-INFO      2018-05-17 00:48:52.000000000 
+0200
@@ -1,12 +1,11 @@
-Metadata-Version: 1.0
+Metadata-Version: 2.1
 Name: dask
-Version: 0.17.4
+Version: 0.17.5
 Summary: Parallel PyData with Task Scheduling
 Home-page: http://github.com/dask/dask/
-Author: Matthew Rocklin
-Author-email: mrock...@gmail.com
+Maintainer: Matthew Rocklin
+Maintainer-email: mrock...@gmail.com
 License: BSD
-Description-Content-Type: UNKNOWN
 Description: Dask
         ====
         
@@ -38,3 +37,9 @@
         
 Keywords: task-scheduling parallel numpy pandas pydata
 Platform: UNKNOWN
+Provides-Extra: array
+Provides-Extra: delayed
+Provides-Extra: complete
+Provides-Extra: bag
+Provides-Extra: dataframe
+Provides-Extra: distributed
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/dask.egg-info/SOURCES.txt 
new/dask-0.17.5/dask.egg-info/SOURCES.txt
--- old/dask-0.17.4/dask.egg-info/SOURCES.txt   2018-05-03 23:45:10.000000000 
+0200
+++ new/dask-0.17.5/dask.egg-info/SOURCES.txt   2018-05-17 00:48:52.000000000 
+0200
@@ -190,6 +190,7 @@
 dask/tests/test_threaded.py
 dask/tests/test_utils.py
 docs/Makefile
+docs/README.rst
 docs/make.bat
 docs/source/api.rst
 docs/source/array-api.rst
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/docs/README.rst 
new/dask-0.17.5/docs/README.rst
--- old/dask-0.17.4/docs/README.rst     1970-01-01 01:00:00.000000000 +0100
+++ new/dask-0.17.5/docs/README.rst     2017-04-24 17:38:09.000000000 +0200
@@ -0,0 +1,10 @@
+To build a local copy of the dask docs, install the programs in
+requirements-docs.txt and run 'make html'. If you use the conda package manager
+these commands suffice::
+
+  git clone g...@github.com:dask/dask.git
+  cd dask/docs
+  conda create -n daskdocs --file requirements-docs.txt
+  source activate daskdocs
+  make html
+  open build/html/index.html
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-0.17.4/docs/source/changelog.rst 
new/dask-0.17.5/docs/source/changelog.rst
--- old/dask-0.17.4/docs/source/changelog.rst   2018-05-03 23:43:54.000000000 
+0200
+++ new/dask-0.17.5/docs/source/changelog.rst   2018-05-17 00:43:27.000000000 
+0200
@@ -1,27 +1,21 @@
 Changelog
 =========
 
-
-0.18.0 / 2018-MM-DD
+0.17.5 / 2018-05-16
 -------------------
 
 Array
 +++++
 
--
+- Fix ``rechunk`` with chunksize of -1 in a dict (:pr:`3469`) `Stephan Hoyer`_
+- ``einsum`` now accepts the ``split_every`` parameter (:pr:`3396`) `Guido 
Imperiale`_
+- Improved slicing performance (:pr:`3469`) `Yu Feng`_
 
-Dataframe
+DataFrame
 +++++++++
 
-Bag
-+++
-
--
-
-Core
-++++
+- Compatibility with pandas 0.23.0 (:pr:`3499`) `Tom Augspurger`_
 
--
 
 0.17.4 / 2018-05-03
 -------------------

commit python-dask for openSUSE:Factory

Reply via email to