Hello community, here is the log from the commit of package python-dask for openSUSE:Factory checked in at 2018-05-29 10:35:04 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-dask (Old) and /work/SRC/openSUSE:Factory/.python-dask.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-dask" Tue May 29 10:35:04 2018 rev:4 rq:610881 version:0.17.5 Changes: -------- --- /work/SRC/openSUSE:Factory/python-dask/python-dask.changes 2018-05-11 09:18:35.489827487 +0200 +++ /work/SRC/openSUSE:Factory/.python-dask.new/python-dask.changes 2018-05-29 10:35:06.906209160 +0200 @@ -1,0 +2,6 @@ +Mon May 21 03:57:53 UTC 2018 - a...@gmx.de + +- update to version 0.17.5: + * Compatibility with pandas 0.23.0 (:pr:`3499`) Tom Augspurger + +------------------------------------------------------------------- Old: ---- dask-0.17.4.tar.gz New: ---- dask-0.17.5.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-dask.spec ++++++ --- /var/tmp/diff_new_pack.twy0V6/_old 2018-05-29 10:35:07.562184937 +0200 +++ /var/tmp/diff_new_pack.twy0V6/_new 2018-05-29 10:35:07.566184789 +0200 @@ -22,7 +22,7 @@ # python(2/3)-distributed has a dependency loop with python(2/3)-dask %bcond_with test_distributed Name: python-dask -Version: 0.17.4 +Version: 0.17.5 Release: 0 Summary: Minimal task scheduling abstraction License: BSD-3-Clause ++++++ dask-0.17.4.tar.gz -> dask-0.17.5.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/PKG-INFO new/dask-0.17.5/PKG-INFO --- old/dask-0.17.4/PKG-INFO 2018-05-03 23:45:10.000000000 +0200 +++ new/dask-0.17.5/PKG-INFO 2018-05-17 00:48:52.000000000 +0200 @@ -1,12 +1,11 @@ -Metadata-Version: 1.0 +Metadata-Version: 2.1 Name: dask -Version: 0.17.4 +Version: 0.17.5 Summary: Parallel PyData with Task Scheduling Home-page: http://github.com/dask/dask/ -Author: Matthew Rocklin -Author-email: mrock...@gmail.com +Maintainer: Matthew Rocklin +Maintainer-email: mrock...@gmail.com License: BSD -Description-Content-Type: UNKNOWN Description: Dask ==== @@ -38,3 +37,9 @@ Keywords: task-scheduling parallel numpy pandas pydata Platform: UNKNOWN +Provides-Extra: array +Provides-Extra: delayed +Provides-Extra: complete +Provides-Extra: bag +Provides-Extra: dataframe +Provides-Extra: distributed diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/_version.py new/dask-0.17.5/dask/_version.py --- old/dask-0.17.4/dask/_version.py 2018-05-03 23:45:10.000000000 +0200 +++ new/dask-0.17.5/dask/_version.py 2018-05-17 00:48:52.000000000 +0200 @@ -11,8 +11,8 @@ { "dirty": false, "error": null, - "full-revisionid": "13f07b53be51110dbfd42943cc863220fc50ed7e", - "version": "0.17.4" + "full-revisionid": "b2bed5eb145b39641181140b8f64a9b4427b53ba", + "version": "0.17.5" } ''' # END VERSION_JSON diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/array/chunk.py new/dask-0.17.5/dask/array/chunk.py --- old/dask-0.17.4/dask/array/chunk.py 2018-05-02 17:34:11.000000000 +0200 +++ new/dask-0.17.5/dask/array/chunk.py 2018-05-17 00:43:27.000000000 +0200 @@ -235,3 +235,14 @@ else: x = np.asfortranarray(x) return x.T.view(dtype).T + + +def einsum(*operands, **kwargs): + subscripts = kwargs.pop('subscripts') + ncontract_inds = kwargs.pop('ncontract_inds') + dtype = kwargs.pop('kernel_dtype') + chunk = np.einsum(subscripts, *operands, dtype=dtype, **kwargs) + + # Avoid concatenate=True in atop by adding 1's + # for the contracted dimensions + return chunk.reshape(chunk.shape + (1,) * ncontract_inds) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/array/einsumfuncs.py new/dask-0.17.5/dask/array/einsumfuncs.py --- old/dask-0.17.4/dask/array/einsumfuncs.py 2018-05-02 17:34:11.000000000 +0200 +++ new/dask-0.17.5/dask/array/einsumfuncs.py 2018-05-17 00:43:27.000000000 +0200 @@ -7,6 +7,7 @@ from numpy.compat import basestring from .core import (atop, asarray) +from . import chunk einsum_symbols = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' einsum_symbols_set = set(einsum_symbols) @@ -182,24 +183,20 @@ return (input_subscripts, output_subscript, operands) -def _einsum_kernel(*operands, **kwargs): - subscripts = kwargs.pop('subscripts') - ncontract_inds = kwargs.pop('ncontract_inds') - dtype = kwargs.pop('kernel_dtype') - chunk = np.einsum(subscripts, *operands, dtype=dtype, **kwargs) - - # Avoid concatenate=True in atop by adding 1's - # for the contracted dimensions - return chunk.reshape(chunk.shape + (1,) * ncontract_inds) - - einsum_can_optimize = LooseVersion(np.__version__) >= LooseVersion("1.12.0") @wraps(np.einsum) def einsum(*operands, **kwargs): - dtype = kwargs.get('dtype') - optimize = kwargs.get('optimize') + casting = kwargs.pop('casting', 'safe') + dtype = kwargs.pop('dtype', None) + optimize = kwargs.pop('optimize', False) + order = kwargs.pop('order', 'K') + split_every = kwargs.pop('split_every', None) + if kwargs: + raise TypeError("einsum() got unexpected keyword " + "argument(s) %s" % ",".join(kwargs)) + einsum_dtype = dtype inputs, outputs, ops = parse_einsum_input(operands) @@ -209,16 +206,18 @@ if dtype is None: dtype = np.result_type(*[o.dtype for o in ops]) - if optimize is None: - optimize = False - - if einsum_can_optimize and optimize is not False: - # Avoid computation of dask arrays within np.einsum_path - # by passing in small numpy arrays broadcasted - # up to the right shape - fake_ops = [np.broadcast_to(o.dtype.type(0), shape=o.shape) - for o in ops] - optimize, _ = np.einsum_path(subscripts, *fake_ops, optimize=optimize) + if einsum_can_optimize: + if optimize is not False: + # Avoid computation of dask arrays within np.einsum_path + # by passing in small numpy arrays broadcasted + # up to the right shape + fake_ops = [np.broadcast_to(o.dtype.type(0), shape=o.shape) + for o in ops] + optimize, _ = np.einsum_path(subscripts, *fake_ops, + optimize=optimize) + kwargs = {'optimize': optimize} + else: + kwargs = {} inputs = [tuple(i) for i in inputs.split(",")] @@ -229,27 +228,21 @@ contract_inds = all_inds - set(outputs) ncontract_inds = len(contract_inds) - # Update kwargs with np.einsum parameters - kwargs['subscripts'] = subscripts - kwargs['kernel_dtype'] = einsum_dtype - kwargs['ncontract_inds'] = ncontract_inds - - if einsum_can_optimize: - kwargs['optimize'] = optimize - - # Update kwargs with atop parameters - kwargs['adjust_chunks'] = {ind: 1 for ind in contract_inds} - kwargs['dtype'] = dtype - # Introduce the contracted indices into the atop product # so that we get numpy arrays, not lists - result = atop(_einsum_kernel, tuple(outputs) + tuple(contract_inds), + result = atop(chunk.einsum, tuple(outputs) + tuple(contract_inds), *(a for ap in zip(ops, inputs) for a in ap), - **kwargs) + # atop parameters + adjust_chunks={ind: 1 for ind in contract_inds}, dtype=dtype, + # np.einsum parameters + subscripts=subscripts, kernel_dtype=einsum_dtype, + ncontract_inds=ncontract_inds, order=order, + casting=casting, **kwargs) # Now reduce over any extra contraction dimensions if ncontract_inds > 0: size = len(outputs) - return result.sum(axis=list(range(size, size + ncontract_inds))) + return result.sum(axis=list(range(size, size + ncontract_inds)), + split_every=split_every) return result diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/array/rechunk.py new/dask-0.17.5/dask/array/rechunk.py --- old/dask-0.17.4/dask/array/rechunk.py 2018-03-06 00:14:39.000000000 +0100 +++ new/dask-0.17.5/dask/array/rechunk.py 2018-05-17 00:43:27.000000000 +0200 @@ -191,12 +191,16 @@ >>> blockshape_dict_to_tuple(((4, 4), (5, 5)), {1: 3}) ((4, 4), (3, 3, 3, 1)) + >>> blockshape_dict_to_tuple(((4, 4), (5, 5)), {1: -1}) + ((4, 4), (10,)) + """ shape = tuple(map(sum, old_chunks)) new_chunks = list(old_chunks) for k, v in d.items(): - div = shape[k] // v - mod = shape[k] % v + if v == -1: + v = shape[k] + div, mod = divmod(shape[k], v) new_chunks[k] = (v,) * div + ((mod,) if mod else ()) return tuple(new_chunks) @@ -227,15 +231,17 @@ Parameters ---------- - x: dask array - chunks: tuple - The new block dimensions to create + x: dask array + Array to be rechunked. + chunks: int, tuple or dict + The new block dimensions to create. -1 indicates the full size of the + corresponding dimension. threshold: int - The graph growth factor under which we don't bother - introducing an intermediate step + The graph growth factor under which we don't bother introducing an + intermediate step. block_size_limit: int The maximum block size (in bytes) we want to produce during an - intermediate step + intermediate step. """ threshold = threshold or DEFAULT_THRESHOLD block_size_limit = block_size_limit or DEFAULT_BLOCK_SIZE_LIMIT diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/array/slicing.py new/dask-0.17.5/dask/array/slicing.py --- old/dask-0.17.4/dask/array/slicing.py 2018-03-06 00:14:39.000000000 +0100 +++ new/dask-0.17.5/dask/array/slicing.py 2018-05-17 00:43:27.000000000 +0200 @@ -3,10 +3,10 @@ from itertools import product import math from numbers import Integral, Number -from operator import add, getitem, itemgetter +from operator import getitem, itemgetter import numpy as np -from toolz import accumulate, memoize, merge, pluck, concat +from toolz import memoize, merge, pluck, concat from .. import core from .. import sharedict @@ -372,13 +372,16 @@ >>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3)) {4: slice(-1, -12, -3)} """ + chunk_boundaries = np.cumsum(lengths) + if isinstance(index, Integral): - i = 0 - ind = index - lens = list(lengths) - while ind >= lens[0]: - i += 1 - ind -= lens.pop(0) + # use right-side search to be consistent with previous result + i = chunk_boundaries.searchsorted(index, side='right') + if i > 0: + # the very first chunk has no relative shift + ind = index - chunk_boundaries[i - 1] + else: + ind = index return {i: ind} assert isinstance(index, slice) @@ -391,7 +394,7 @@ start = index.start or 0 stop = index.stop if index.stop is not None else dim_shape else: - start = index.start or dim_shape - 1 + start = index.start if index.start is not None else dim_shape - 1 start = dim_shape - 1 if start >= dim_shape else start stop = -(dim_shape + 1) if index.stop is None else index.stop @@ -403,7 +406,19 @@ d = dict() if step > 0: - for i, length in enumerate(lengths): + istart = chunk_boundaries.searchsorted(start, side='right') + istop = chunk_boundaries.searchsorted(stop, side='left') + + # the bound is not exactly tight; make it tighter? + istop = min(istop + 1, len(lengths)) + + # jump directly to istart + if istart > 0: + start = start - chunk_boundaries[istart - 1] + stop = stop - chunk_boundaries[istart - 1] + + for i in range(istart, istop): + length = lengths[i] if start < length and stop > 0: d[i] = slice(start, min(stop, length), step) start = (start - length) % step @@ -412,8 +427,16 @@ stop -= length else: rstart = start # running start - chunk_boundaries = list(accumulate(add, lengths)) - for i, chunk_stop in reversed(list(enumerate(chunk_boundaries))): + + istart = chunk_boundaries.searchsorted(start, side='left') + istop = chunk_boundaries.searchsorted(stop, side='right') + + # the bound is not exactly tight; make it tighter? + istart = min(istart + 1, len(chunk_boundaries) - 1) + istop = max(istop - 1, -1) + + for i in range(istart, istop, -1): + chunk_stop = chunk_boundaries[i] # create a chunk start and stop if i == 0: chunk_start = 0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/array/tests/test_rechunk.py new/dask-0.17.5/dask/array/tests/test_rechunk.py --- old/dask-0.17.4/dask/array/tests/test_rechunk.py 2018-03-06 00:14:39.000000000 +0100 +++ new/dask-0.17.5/dask/array/tests/test_rechunk.py 2018-05-17 00:43:27.000000000 +0200 @@ -157,6 +157,10 @@ y = x.rechunk(chunks={0: (12, 12)}) assert y.chunks == ((12, 12), (8, 8, 8)) + x = da.ones((24, 24), chunks=(4, 8)) + y = x.rechunk(chunks={0: -1}) + assert y.chunks == ((24,), (8, 8, 8)) + def test_rechunk_with_empty_input(): x = da.ones((24, 24), chunks=(4, 8)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/array/tests/test_routines.py new/dask-0.17.5/dask/array/tests/test_routines.py --- old/dask-0.17.4/dask/array/tests/test_routines.py 2018-05-03 23:39:27.000000000 +0200 +++ new/dask-0.17.5/dask/array/tests/test_routines.py 2018-05-17 00:43:27.000000000 +0200 @@ -1407,6 +1407,19 @@ da.einsum(sig, *np_inputs, casting=casting)) +@pytest.mark.parametrize('split_every', [None, 2]) +def test_einsum_split_every(split_every): + np_inputs, da_inputs = _numpy_and_dask_inputs('a') + assert_eq(np.einsum('a', *np_inputs), + da.einsum('a', *da_inputs, split_every=split_every)) + + +def test_einsum_invalid_args(): + _, da_inputs = _numpy_and_dask_inputs('a') + with pytest.raises(TypeError): + da.einsum('a', *da_inputs, foo=1, bar=2) + + def test_einsum_broadcasting_contraction(): a = np.random.rand(1, 5, 4) b = np.random.rand(4, 6) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/core.py new/dask-0.17.5/dask/dataframe/core.py --- old/dask-0.17.4/dask/dataframe/core.py 2018-05-03 23:39:27.000000000 +0200 +++ new/dask-0.17.5/dask/dataframe/core.py 2018-05-17 00:43:27.000000000 +0200 @@ -38,7 +38,8 @@ from .optimize import optimize from .utils import (meta_nonempty, make_meta, insert_meta_param_description, raise_on_meta_error, clear_known_categories, - is_categorical_dtype, has_known_categories, PANDAS_VERSION) + is_categorical_dtype, has_known_categories, PANDAS_VERSION, + index_summary) no_default = '__no_default__' @@ -2780,7 +2781,8 @@ bind_method(cls, name, meth) @insert_meta_param_description(pad=12) - def apply(self, func, axis=0, args=(), meta=no_default, **kwds): + def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, + args=(), meta=no_default, **kwds): """ Parallel version of pandas.DataFrame.apply This mimics the pandas version except for the following: @@ -2842,6 +2844,17 @@ """ axis = self._validate_axis(axis) + pandas_kwargs = { + 'axis': axis, + 'broadcast': broadcast, + 'raw': raw, + 'reduce': None, + } + + if PANDAS_VERSION >= '0.23.0': + kwds.setdefault('result_type', None) + + kwds.update(pandas_kwargs) if axis == 0: msg = ("dd.DataFrame.apply only supports axis=1\n" @@ -2857,10 +2870,9 @@ warnings.warn(msg) meta = _emulate(M.apply, self._meta_nonempty, func, - axis=axis, args=args, udf=True, **kwds) + args=args, udf=True, **kwds) - return map_partitions(M.apply, self, func, axis, - False, False, None, args, meta=meta, **kwds) + return map_partitions(M.apply, self, func, args=args, meta=meta, **kwds) @derived_from(pd.DataFrame) def applymap(self, func, meta='__no_default__'): @@ -2909,7 +2921,7 @@ if verbose: index = computations['index'] counts = computations['count'] - lines.append(index.summary()) + lines.append(index_summary(index)) lines.append('Data columns (total {} columns):'.format(len(self.columns))) if PANDAS_VERSION >= '0.20.0': @@ -2921,7 +2933,7 @@ column_info = [column_template.format(pprint_thing(x[0]), x[1], x[2]) for x in zip(self.columns, counts, self.dtypes)] else: - column_info = [self.columns.summary(name='Columns')] + column_info = [index_summary(self.columns, name='Columns')] lines.extend(column_info) dtype_counts = ['%s(%d)' % k for k in sorted(self.dtypes.value_counts().iteritems(), key=str)] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/rolling.py new/dask-0.17.5/dask/dataframe/rolling.py --- old/dask-0.17.4/dask/dataframe/rolling.py 2018-03-06 00:14:39.000000000 +0100 +++ new/dask-0.17.5/dask/dataframe/rolling.py 2018-05-17 00:43:27.000000000 +0200 @@ -8,7 +8,7 @@ from ..base import tokenize from ..utils import M, funcname, derived_from from .core import _emulate -from .utils import make_meta +from .utils import make_meta, PANDAS_VERSION def overlap_chunk(func, prev_part, current_part, next_part, before, after, @@ -292,8 +292,19 @@ return self._call_method('quantile', quantile) @derived_from(pd_Rolling) - def apply(self, func, args=(), kwargs={}): - return self._call_method('apply', func, args=args, kwargs=kwargs) + def apply(self, func, args=(), kwargs={}, **kwds): + # TODO: In a future version of pandas this will change to + # raw=False. Think about inspecting the function signature and setting + # to that? + if PANDAS_VERSION >= '0.23.0': + kwds.setdefault("raw", None) + else: + if kwargs: + msg = ("Invalid argument to 'apply'. Keyword arguments " + "should be given as a dict to the 'kwargs' arugment. ") + raise TypeError(msg) + return self._call_method('apply', func, args=args, + kwargs=kwargs, **kwds) def __repr__(self): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/tests/test_categorical.py new/dask-0.17.5/dask/dataframe/tests/test_categorical.py --- old/dask-0.17.4/dask/dataframe/tests/test_categorical.py 2018-03-06 00:14:39.000000000 +0100 +++ new/dask-0.17.5/dask/dataframe/tests/test_categorical.py 2018-05-17 00:43:27.000000000 +0200 @@ -119,12 +119,14 @@ def test_categorize(): - meta = clear_known_categories(frames4[0]) + # rename y to y_ to avoid pandas future warning about ambiguous + # levels + meta = clear_known_categories(frames4[0]).rename(columns={'y': 'y_'}) ddf = dd.DataFrame({('unknown', i): df for (i, df) in enumerate(frames3)}, - 'unknown', meta, [None] * 4) + 'unknown', meta, [None] * 4).rename(columns={'y': 'y_'}) ddf = ddf.assign(w=ddf.w.cat.set_categories(['x', 'y', 'z'])) assert ddf.w.cat.known - assert not ddf.y.cat.known + assert not ddf.y_.cat.known assert not ddf.index.cat.known df = ddf.compute() @@ -132,27 +134,27 @@ known_index = index is not False # By default categorize object and unknown cat columns ddf2 = ddf.categorize(index=index) - assert ddf2.y.cat.known + assert ddf2.y_.cat.known assert ddf2.v.cat.known assert ddf2.index.cat.known == known_index assert_eq(ddf2, df.astype({'v': 'category'}), check_categorical=False) # Specifying split_every works ddf2 = ddf.categorize(index=index, split_every=2) - assert ddf2.y.cat.known + assert ddf2.y_.cat.known assert ddf2.v.cat.known assert ddf2.index.cat.known == known_index assert_eq(ddf2, df.astype({'v': 'category'}), check_categorical=False) # Specifying one column doesn't affect others ddf2 = ddf.categorize('v', index=index) - assert not ddf2.y.cat.known + assert not ddf2.y_.cat.known assert ddf2.v.cat.known assert ddf2.index.cat.known == known_index assert_eq(ddf2, df.astype({'v': 'category'}), check_categorical=False) - ddf2 = ddf.categorize('y', index=index) - assert ddf2.y.cat.known + ddf2 = ddf.categorize('y_', index=index) + assert ddf2.y_.cat.known assert ddf2.v.dtype == 'object' assert ddf2.index.cat.known == known_index assert_eq(ddf2, df) @@ -188,7 +190,7 @@ assert ddf.categorize(index=False) is ddf # Non-object dtype - ddf = dd.from_pandas(df.set_index(df.A), npartitions=5) + ddf = dd.from_pandas(df.set_index(df.A.rename('idx')), npartitions=5) df = ddf.compute() ddf2 = ddf.categorize(index=True) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/tests/test_dataframe.py new/dask-0.17.5/dask/dataframe/tests/test_dataframe.py --- old/dask-0.17.4/dask/dataframe/tests/test_dataframe.py 2018-05-03 23:39:27.000000000 +0200 +++ new/dask-0.17.5/dask/dataframe/tests/test_dataframe.py 2018-05-17 00:43:27.000000000 +0200 @@ -1,4 +1,5 @@ import sys +import textwrap from distutils.version import LooseVersion from itertools import product from operator import add @@ -1612,12 +1613,19 @@ # count dtypes tm.assert_series_equal(a.get_dtype_counts(), df.get_dtype_counts()) - tm.assert_series_equal(a.get_ftype_counts(), df.get_ftype_counts()) tm.assert_series_equal(result.get_dtype_counts(), expected.get_dtype_counts()) - tm.assert_series_equal(result.get_ftype_counts(), - expected.get_ftype_counts()) + + if PANDAS_VERSION >= '0.23.0': + ctx = pytest.warns(FutureWarning) + else: + ctx = pytest.warns(None) + + with ctx: + tm.assert_series_equal(a.get_ftype_counts(), df.get_ftype_counts()) + tm.assert_series_equal(result.get_ftype_counts(), + expected.get_ftype_counts()) def test_deterministic_apply_concat_apply_names(): @@ -2097,7 +2105,7 @@ def test_cov_corr_mixed(): size = 1000 - d = {'dates' : pd.date_range('2015-01-01', periods=size, frequency='1T'), + d = {'dates' : pd.date_range('2015-01-01', periods=size, freq='1T'), 'unique_id' : np.arange(0, size), 'ints' : np.random.randint(0, size, size=size), 'floats' : np.random.randn(size), @@ -2415,9 +2423,11 @@ buf = StringIO() g.info(buf, verbose=False) - assert buf.getvalue() == unicode("""<class 'dask.dataframe.core.DataFrame'> -Columns: 2 entries, (C, count) to (C, sum) -dtypes: int64(2)""") + expected = unicode(textwrap.dedent("""\ + <class 'dask.dataframe.core.DataFrame'> + Columns: 2 entries, ('C', 'count') to ('C', 'sum') + dtypes: int64(2)""")) + assert buf.getvalue() == expected def test_categorize_info(): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/tests/test_indexing.py new/dask-0.17.5/dask/dataframe/tests/test_indexing.py --- old/dask-0.17.4/dask/dataframe/tests/test_indexing.py 2018-03-06 00:14:39.000000000 +0100 +++ new/dask-0.17.5/dask/dataframe/tests/test_indexing.py 2018-05-17 00:43:27.000000000 +0200 @@ -8,7 +8,7 @@ import dask.dataframe as dd from dask.dataframe.indexing import _coerce_loc_index -from dask.dataframe.utils import assert_eq, make_meta +from dask.dataframe.utils import assert_eq, make_meta, PANDAS_VERSION dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, @@ -33,18 +33,30 @@ assert_eq(d.loc[:8], full.loc[:8]) assert_eq(d.loc[3:], full.loc[3:]) assert_eq(d.loc[[5]], full.loc[[5]]) - assert_eq(d.loc[[3, 4, 1, 8]], full.loc[[3, 4, 1, 8]]) - assert_eq(d.loc[[3, 4, 1, 9]], full.loc[[3, 4, 1, 9]]) - assert_eq(d.loc[np.array([3, 4, 1, 9])], full.loc[np.array([3, 4, 1, 9])]) + + if PANDAS_VERSION >= '0.23.0': + expected_warning = FutureWarning + else: + expected_warning = None + + with pytest.warns(expected_warning): + assert_eq(d.loc[[3, 4, 1, 8]], full.loc[[3, 4, 1, 8]]) + with pytest.warns(expected_warning): + assert_eq(d.loc[[3, 4, 1, 9]], full.loc[[3, 4, 1, 9]]) + with pytest.warns(expected_warning): + assert_eq(d.loc[np.array([3, 4, 1, 9])], full.loc[np.array([3, 4, 1, 9])]) assert_eq(d.a.loc[5], full.a.loc[5:5]) assert_eq(d.a.loc[3:8], full.a.loc[3:8]) assert_eq(d.a.loc[:8], full.a.loc[:8]) assert_eq(d.a.loc[3:], full.a.loc[3:]) assert_eq(d.a.loc[[5]], full.a.loc[[5]]) - assert_eq(d.a.loc[[3, 4, 1, 8]], full.a.loc[[3, 4, 1, 8]]) - assert_eq(d.a.loc[[3, 4, 1, 9]], full.a.loc[[3, 4, 1, 9]]) - assert_eq(d.a.loc[np.array([3, 4, 1, 9])], full.a.loc[np.array([3, 4, 1, 9])]) + with pytest.warns(expected_warning): + assert_eq(d.a.loc[[3, 4, 1, 8]], full.a.loc[[3, 4, 1, 8]]) + with pytest.warns(expected_warning): + assert_eq(d.a.loc[[3, 4, 1, 9]], full.a.loc[[3, 4, 1, 9]]) + with pytest.warns(expected_warning): + assert_eq(d.a.loc[np.array([3, 4, 1, 9])], full.a.loc[np.array([3, 4, 1, 9])]) assert_eq(d.a.loc[[]], full.a.loc[[]]) assert_eq(d.a.loc[np.array([])], full.a.loc[np.array([])]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/tests/test_rolling.py new/dask-0.17.5/dask/dataframe/tests/test_rolling.py --- old/dask-0.17.4/dask/dataframe/tests/test_rolling.py 2018-03-06 00:14:39.000000000 +0100 +++ new/dask-0.17.5/dask/dataframe/tests/test_rolling.py 2018-05-17 00:43:27.000000000 +0200 @@ -3,7 +3,7 @@ import numpy as np import dask.dataframe as dd -from dask.dataframe.utils import assert_eq +from dask.dataframe.utils import assert_eq, PANDAS_VERSION N = 40 df = pd.DataFrame({'a': np.random.randn(N).cumsum(), @@ -122,18 +122,28 @@ # DataFrame prolling = df.rolling(window, center=center) drolling = ddf.rolling(window, center=center) - assert_eq(getattr(prolling, method)(*args), - getattr(drolling, method)(*args), + if method == 'apply' and PANDAS_VERSION >= '0.23.0': + kwargs = {'raw': False} + else: + kwargs = {} + assert_eq(getattr(prolling, method)(*args, **kwargs), + getattr(drolling, method)(*args, **kwargs), check_less_precise=check_less_precise) # Series prolling = df.a.rolling(window, center=center) drolling = ddf.a.rolling(window, center=center) - assert_eq(getattr(prolling, method)(*args), - getattr(drolling, method)(*args), + assert_eq(getattr(prolling, method)(*args, **kwargs), + getattr(drolling, method)(*args, **kwargs), check_less_precise=check_less_precise) +@pytest.mark.skipif(PANDAS_VERSION >= '0.23.0', reason="Raw is allowed.") +def test_rolling_raw_pandas_lt_0230_raises(): + with pytest.raises(TypeError): + df.rolling(2).apply(mad, raw=True) + + def test_rolling_raises(): df = pd.DataFrame({'a': np.random.randn(25).cumsum(), 'b': np.random.randint(100, size=(25,))}) @@ -209,17 +219,21 @@ @pytest.mark.parametrize('window', ['1S', '2S', '3S', pd.offsets.Second(5)]) def test_time_rolling_methods(method, args, window, check_less_precise): # DataFrame + if method == 'apply' and PANDAS_VERSION >= '0.23.0': + kwargs = {"raw": False} + else: + kwargs = {} prolling = ts.rolling(window) drolling = dts.rolling(window) - assert_eq(getattr(prolling, method)(*args), - getattr(drolling, method)(*args), + assert_eq(getattr(prolling, method)(*args, **kwargs), + getattr(drolling, method)(*args, **kwargs), check_less_precise=check_less_precise) # Series prolling = ts.a.rolling(window) drolling = dts.a.rolling(window) - assert_eq(getattr(prolling, method)(*args), - getattr(drolling, method)(*args), + assert_eq(getattr(prolling, method)(*args, **kwargs), + getattr(drolling, method)(*args, **kwargs), check_less_precise=check_less_precise) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/tests/test_ufunc.py new/dask-0.17.5/dask/dataframe/tests/test_ufunc.py --- old/dask-0.17.4/dask/dataframe/tests/test_ufunc.py 2018-05-03 23:39:27.000000000 +0200 +++ new/dask-0.17.5/dask/dataframe/tests/test_ufunc.py 2018-05-17 00:43:27.000000000 +0200 @@ -345,17 +345,17 @@ assert isinstance(dafunc(dask, darray), dask_type) assert isinstance(dafunc(darray, dask), dask_type) - tm.assert_numpy_array_equal(dafunc(dask, darray).compute().as_matrix(), - npfunc(pandas.as_matrix(), darray).compute()) + tm.assert_numpy_array_equal(dafunc(dask, darray).compute().values, + npfunc(pandas.values, darray).compute()) # applying NumPy ufunc is lazy assert isinstance(npfunc(dask, darray), dask_type) assert isinstance(npfunc(darray, dask), dask_type) - tm.assert_numpy_array_equal(npfunc(dask, darray).compute().as_matrix(), - npfunc(pandas.as_matrix(), darray.compute())) - tm.assert_numpy_array_equal(npfunc(darray, dask).compute().as_matrix(), - npfunc(darray.compute(), pandas.as_matrix())) + tm.assert_numpy_array_equal(npfunc(dask, darray).compute().values, + npfunc(pandas.values, darray.compute())) + tm.assert_numpy_array_equal(npfunc(darray, dask).compute().values, + npfunc(darray.compute(), pandas.values)) @pytest.mark.parametrize('redfunc', ['sum', 'prod', 'min', 'max', 'mean']) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask/dataframe/utils.py new/dask-0.17.5/dask/dataframe/utils.py --- old/dask-0.17.4/dask/dataframe/utils.py 2018-04-30 23:52:38.000000000 +0200 +++ new/dask-0.17.5/dask/dataframe/utils.py 2018-05-17 00:43:27.000000000 +0200 @@ -498,6 +498,22 @@ errmsg)) +def index_summary(idx, name=None): + """Summarized representation of an Index. + """ + n = len(idx) + if name is None: + name = idx.__class__.__name__ + if n: + head = idx[0] + tail = idx[-1] + summary = ', {} to {}'.format(head, tail) + else: + summary = '' + + return "{}: {} entries{}".format(name, n, summary) + + ############################################################### # Testing ############################################################### diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask.egg-info/PKG-INFO new/dask-0.17.5/dask.egg-info/PKG-INFO --- old/dask-0.17.4/dask.egg-info/PKG-INFO 2018-05-03 23:45:10.000000000 +0200 +++ new/dask-0.17.5/dask.egg-info/PKG-INFO 2018-05-17 00:48:52.000000000 +0200 @@ -1,12 +1,11 @@ -Metadata-Version: 1.0 +Metadata-Version: 2.1 Name: dask -Version: 0.17.4 +Version: 0.17.5 Summary: Parallel PyData with Task Scheduling Home-page: http://github.com/dask/dask/ -Author: Matthew Rocklin -Author-email: mrock...@gmail.com +Maintainer: Matthew Rocklin +Maintainer-email: mrock...@gmail.com License: BSD -Description-Content-Type: UNKNOWN Description: Dask ==== @@ -38,3 +37,9 @@ Keywords: task-scheduling parallel numpy pandas pydata Platform: UNKNOWN +Provides-Extra: array +Provides-Extra: delayed +Provides-Extra: complete +Provides-Extra: bag +Provides-Extra: dataframe +Provides-Extra: distributed diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/dask.egg-info/SOURCES.txt new/dask-0.17.5/dask.egg-info/SOURCES.txt --- old/dask-0.17.4/dask.egg-info/SOURCES.txt 2018-05-03 23:45:10.000000000 +0200 +++ new/dask-0.17.5/dask.egg-info/SOURCES.txt 2018-05-17 00:48:52.000000000 +0200 @@ -190,6 +190,7 @@ dask/tests/test_threaded.py dask/tests/test_utils.py docs/Makefile +docs/README.rst docs/make.bat docs/source/api.rst docs/source/array-api.rst diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/docs/README.rst new/dask-0.17.5/docs/README.rst --- old/dask-0.17.4/docs/README.rst 1970-01-01 01:00:00.000000000 +0100 +++ new/dask-0.17.5/docs/README.rst 2017-04-24 17:38:09.000000000 +0200 @@ -0,0 +1,10 @@ +To build a local copy of the dask docs, install the programs in +requirements-docs.txt and run 'make html'. If you use the conda package manager +these commands suffice:: + + git clone g...@github.com:dask/dask.git + cd dask/docs + conda create -n daskdocs --file requirements-docs.txt + source activate daskdocs + make html + open build/html/index.html diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-0.17.4/docs/source/changelog.rst new/dask-0.17.5/docs/source/changelog.rst --- old/dask-0.17.4/docs/source/changelog.rst 2018-05-03 23:43:54.000000000 +0200 +++ new/dask-0.17.5/docs/source/changelog.rst 2018-05-17 00:43:27.000000000 +0200 @@ -1,27 +1,21 @@ Changelog ========= - -0.18.0 / 2018-MM-DD +0.17.5 / 2018-05-16 ------------------- Array +++++ -- +- Fix ``rechunk`` with chunksize of -1 in a dict (:pr:`3469`) `Stephan Hoyer`_ +- ``einsum`` now accepts the ``split_every`` parameter (:pr:`3396`) `Guido Imperiale`_ +- Improved slicing performance (:pr:`3469`) `Yu Feng`_ -Dataframe +DataFrame +++++++++ -Bag -+++ - -- - -Core -++++ +- Compatibility with pandas 0.23.0 (:pr:`3499`) `Tom Augspurger`_ -- 0.17.4 / 2018-05-03 -------------------