Hello community, here is the log from the commit of package python-dask for openSUSE:Factory checked in at 2020-02-10 21:53:30 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-dask (Old) and /work/SRC/openSUSE:Factory/.python-dask.new.26092 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-dask" Mon Feb 10 21:53:30 2020 rev:28 rq:772459 version:2.10.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-dask/python-dask.changes 2020-01-19 20:58:26.624081961 +0100 +++ /work/SRC/openSUSE:Factory/.python-dask.new.26092/python-dask.changes 2020-02-10 21:53:35.962196826 +0100 @@ -1,0 +2,26 @@ +Sat Feb 8 21:45:22 UTC 2020 - Arun Persaud <[email protected]> + +- update to version 2.10.1: + * Fix Pandas 1.0 version comparison (:pr:`5851`) Tom Augspurger + * Fix typo in distributed diagnostics documentation (:pr:`5841`) + Gerrit Holl + +- changes from version 2.10.0: + * Support for pandas 1.0's new BooleanDtype and StringDtype + (:pr:`5815`) Tom Augspurger + * Compatibility with pandas 1.0's API breaking changes and + deprecations (:pr:`5792`) Tom Augspurger + * Fixed non-deterministic tokenization of some extension-array + backed pandas objects (:pr:`5813`) Tom Augspurger + * Fixed handling of dataclass class objects in collections + (:pr:`5812`) Matteo De Wint + * Fixed resampling with tz-aware dates when one of the endpoints + fell in a non-existent time (:pr:`5807`) dfonnegra + * Delay initial Zarr dataset creation until the computation occurs + (:pr:`5797`) Chris Roat + * Use parquet dataset statistics in more cases with the pyarrow + engine (:pr:`5799`) Richard J Zamora + * Fixed exception in groupby.std() when some of the keys were large + integers (:pr:`5737`) H. Thomson Comer + +------------------------------------------------------------------- Old: ---- dask-2.9.2.tar.gz New: ---- dask-2.10.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-dask.spec ++++++ --- /var/tmp/diff_new_pack.Ihkeug/_old 2020-02-10 21:53:36.986197392 +0100 +++ /var/tmp/diff_new_pack.Ihkeug/_new 2020-02-10 21:53:36.986197392 +0100 @@ -27,7 +27,7 @@ %endif %define skip_python2 1 Name: python-dask%{psuffix} -Version: 2.9.2 +Version: 2.10.1 Release: 0 Summary: Minimal task scheduling abstraction License: BSD-3-Clause ++++++ dask-2.9.2.tar.gz -> dask-2.10.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/PKG-INFO new/dask-2.10.1/PKG-INFO --- old/dask-2.9.2/PKG-INFO 2020-01-17 00:50:13.000000000 +0100 +++ new/dask-2.10.1/PKG-INFO 2020-01-30 17:44:57.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: dask -Version: 2.9.2 +Version: 2.10.1 Summary: Parallel PyData with Task Scheduling Home-page: https://github.com/dask/dask/ Maintainer: Matthew Rocklin diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/_version.py new/dask-2.10.1/dask/_version.py --- old/dask-2.9.2/dask/_version.py 2020-01-17 00:50:13.000000000 +0100 +++ new/dask-2.10.1/dask/_version.py 2020-01-30 17:44:57.000000000 +0100 @@ -11,8 +11,8 @@ { "dirty": false, "error": null, - "full-revisionid": "e3f80df7fff7627082688b7cccff1e9131001446", - "version": "2.9.2" + "full-revisionid": "4ddcc9374533e7765f341f86edfedf5c8d82df8c", + "version": "2.10.1" } ''' # END VERSION_JSON diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/array/core.py new/dask-2.10.1/dask/array/core.py --- old/dask-2.9.2/dask/array/core.py 2020-01-15 22:59:20.000000000 +0100 +++ new/dask-2.10.1/dask/array/core.py 2020-01-28 18:29:39.000000000 +0100 @@ -619,7 +619,7 @@ concatenate=True, align_arrays=False, meta=meta, - **kwargs + **kwargs, ) if has_keyword(func, "block_id") or has_keyword(func, "block_info") or drop_axis: @@ -2851,7 +2851,8 @@ paths) overwrite: bool If given array already exists, overwrite=False will cause an error, - where overwrite=True will replace the existing data. + where overwrite=True will replace the existing data. Note that this + check is done at computation time, not during graph creation. compute, return_stored: see ``store()`` kwargs: passed to the ``zarr.create()`` function, e.g., compression options @@ -2902,14 +2903,22 @@ mapper = url chunks = [c[0] for c in arr.chunks] - z = zarr.create( + + # The zarr.create function has the side-effect of immediately + # creating metadata on disk. This may not be desired, + # particularly if compute=False. The caller may be creating many + # arrays on a slow filesystem, with the desire that any I/O be + # sharded across workers (not done serially on the originating + # machine). Or the caller may decide later to not to do this + # computation, and so nothing should be written to disk. + z = delayed(zarr.create)( shape=arr.shape, chunks=chunks, dtype=arr.dtype, store=mapper, path=component, overwrite=overwrite, - **kwargs + **kwargs, ) return arr.store(z, lock=False, compute=compute, return_stored=return_stored) @@ -3865,7 +3874,7 @@ (a, tuple(range(a.ndim)[::-1]) if not is_scalar_for_elemwise(a) else None) for a in args ), - **blockwise_kwargs + **blockwise_kwargs, ) return handle_out(out, result) @@ -4381,7 +4390,7 @@ shape=x.shape, dtype=x.dtype, chunks=tuple([c[0] for c in x.chunks]) if chunks is True else chunks, - **kwargs + **kwargs, ) for dp, x in data.items() ] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/array/svg.py new/dask-2.10.1/dask/array/svg.py --- old/dask-2.9.2/dask/array/svg.py 2019-11-20 02:10:36.000000000 +0100 +++ new/dask-2.10.1/dask/array/svg.py 2020-01-28 18:29:39.000000000 +0100 @@ -47,9 +47,9 @@ lines, (min_x, max_x, min_y, max_y) = svg_grid(x, y, offset=offset, skew=skew) - header = '<svg width="%d" height="%d" style="stroke:rgb(0,0,0);stroke-width:1" >\n' % ( - max_x + 50, - max_y + 50, + header = ( + '<svg width="%d" height="%d" style="stroke:rgb(0,0,0);stroke-width:1" >\n' + % (max_x + 50, max_y + 50) ) footer = "\n</svg>" @@ -85,9 +85,9 @@ z, y, offset=(ox + max_x + 10, oy + max_x), skew=(0, 0) ) - header = '<svg width="%d" height="%d" style="stroke:rgb(0,0,0);stroke-width:1" >\n' % ( - max_z + 50, - max_y + 50, + header = ( + '<svg width="%d" height="%d" style="stroke:rgb(0,0,0);stroke-width:1" >\n' + % (max_z + 50, max_y + 50) ) footer = "\n</svg>" @@ -152,9 +152,9 @@ out.append(o) - header = '<svg width="%d" height="%d" style="stroke:rgb(0,0,0);stroke-width:1" >\n' % ( - left, - total_height, + header = ( + '<svg width="%d" height="%d" style="stroke:rgb(0,0,0);stroke-width:1" >\n' + % (left, total_height) ) footer = "\n</svg>" return header + "\n\n".join(out) + footer diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/array/tests/test_array_core.py new/dask-2.10.1/dask/array/tests/test_array_core.py --- old/dask-2.9.2/dask/array/tests/test_array_core.py 2020-01-15 22:59:20.000000000 +0100 +++ new/dask-2.10.1/dask/array/tests/test_array_core.py 2020-01-28 18:29:39.000000000 +0100 @@ -3742,6 +3742,18 @@ assert a2.chunks == a.chunks +def test_to_zarr_delayed_creates_no_metadata(): + pytest.importorskip("zarr") + with tmpdir() as d: + a = da.from_array([42]) + result = a.to_zarr(d, compute=False) + assert not os.listdir(d) # No .zarray file + # Verify array still created upon compute. + result.compute() + a2 = da.from_zarr(d) + assert_eq(a, a2) + + def test_zarr_existing_array(): zarr = pytest.importorskip("zarr") c = (1, 1) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/base.py new/dask-2.10.1/dask/base.py --- old/dask-2.9.2/dask/base.py 2019-11-20 02:10:36.000000000 +0100 +++ new/dask-2.10.1/dask/base.py 2020-01-28 18:29:39.000000000 +0100 @@ -292,7 +292,7 @@ tsk = (typ, [_unpack(i) for i in expr]) elif typ in (dict, OrderedDict): tsk = (typ, [[_unpack(k), _unpack(v)] for k, v in expr.items()]) - elif is_dataclass(expr): + elif is_dataclass(expr) and not isinstance(expr, type): tsk = ( apply, typ, @@ -739,18 +739,44 @@ @normalize_token.register_lazy("pandas") def register_pandas(): import pandas as pd + from dask.dataframe._compat import PANDAS_GT_0240 @normalize_token.register(pd.Index) def normalize_index(ind): - return [ind.name, normalize_token(ind.values)] + if PANDAS_GT_0240: + values = ind.array + else: + values = ind.values + return [ind.name, normalize_token(values)] + + @normalize_token.register(pd.MultiIndex) + def normalize_index(ind): + codes = ind.codes if PANDAS_GT_0240 else ind.levels + return ( + [ind.name] + + [normalize_token(x) for x in ind.levels] + + [normalize_token(x) for x in codes] + ) @normalize_token.register(pd.Categorical) def normalize_categorical(cat): - return [ - normalize_token(cat.codes), - normalize_token(cat.categories), - cat.ordered, - ] + return [normalize_token(cat.codes), normalize_token(cat.dtype)] + + if PANDAS_GT_0240: + + @normalize_token.register(pd.arrays.PeriodArray) + @normalize_token.register(pd.arrays.DatetimeArray) + @normalize_token.register(pd.arrays.TimedeltaArray) + def normalize_period_array(arr): + return [normalize_token(arr.asi8), normalize_token(arr.dtype)] + + @normalize_token.register(pd.arrays.IntervalArray) + def normalize_interval_array(arr): + return [ + normalize_token(arr.left), + normalize_token(arr.right), + normalize_token(arr.closed), + ] @normalize_token.register(pd.Series) def normalize_series(s): @@ -767,6 +793,21 @@ data += [df.columns, df.index] return list(map(normalize_token, data)) + @normalize_token.register(pd.api.extensions.ExtensionArray) + def normalize_extension_array(arr): + import numpy as np + + return normalize_token(np.asarray(arr)) + + # Dtypes + @normalize_token.register(pd.api.types.CategoricalDtype) + def normalize_categorical_dtype(dtype): + return [normalize_token(dtype.categories), normalize_token(dtype.ordered)] + + @normalize_token.register(pd.api.extensions.ExtensionDtype) + def normalize_period_dtype(dtype): + return normalize_token(dtype.name) + @normalize_token.register_lazy("numpy") def register_numpy(): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/_compat.py new/dask-2.10.1/dask/dataframe/_compat.py --- old/dask-2.9.2/dask/dataframe/_compat.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/_compat.py 2020-01-30 17:30:35.000000000 +0100 @@ -9,7 +9,7 @@ PANDAS_GT_0230 = PANDAS_VERSION >= LooseVersion("0.23.0") PANDAS_GT_0240 = PANDAS_VERSION >= LooseVersion("0.24.0") PANDAS_GT_0250 = PANDAS_VERSION >= LooseVersion("0.25.0") -PANDAS_GT_100 = PANDAS_VERSION >= LooseVersion("1.0.0rc0") +PANDAS_GT_100 = PANDAS_VERSION >= LooseVersion("1.0.0") HAS_INT_NA = PANDAS_GT_0240 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/_dtypes.py new/dask-2.10.1/dask/dataframe/_dtypes.py --- old/dask-2.9.2/dask/dataframe/_dtypes.py 1970-01-01 01:00:00.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/_dtypes.py 2020-01-28 18:29:39.000000000 +0100 @@ -0,0 +1,21 @@ +import pandas as pd +from ._compat import PANDAS_GT_100 +from .extensions import make_array_nonempty, make_scalar + +if PANDAS_GT_100: + + @make_array_nonempty.register(pd.StringDtype) + def _(dtype): + return pd.array(["a", pd.NA], dtype=dtype) + + @make_scalar.register(str) + def _(x): + return "s" + + @make_array_nonempty.register(pd.BooleanDtype) + def _dtype(dtype): + return pd.array([True, pd.NA], dtype=dtype) + + @make_scalar.register(bool) + def _(x): + return True diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/core.py new/dask-2.10.1/dask/dataframe/core.py --- old/dask-2.9.2/dask/dataframe/core.py 2020-01-15 02:08:19.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/core.py 2020-01-30 16:12:49.000000000 +0100 @@ -4162,6 +4162,8 @@ ) if verbose: + import textwrap + index = computations["index"] counts = computations["count"] lines.append(index_summary(index)) @@ -4169,12 +4171,36 @@ from pandas.io.formats.printing import pprint_thing - space = max([len(pprint_thing(k)) for k in self.columns]) + 3 - column_template = "{!s:<%d} {} non-null {}" % space + space = max([len(pprint_thing(k)) for k in self.columns]) + 1 + column_width = max(space, 7) + + header = ( + textwrap.dedent( + """\ + # {{column:<{column_width}}} Non-Null Count Dtype + --- {{underl:<{column_width}}} -------------- -----""" + ) + .format(column_width=column_width) + .format(column="Column", underl="------") + ) + column_template = textwrap.dedent( + """\ + {{i:^3}} {{name:<{column_width}}} {{count}} non-null {{dtype}}""".format( + column_width=column_width + ) + ) column_info = [ - column_template.format(pprint_thing(x[0]), x[1], x[2]) - for x in zip(self.columns, counts, self.dtypes) + column_template.format( + i=pprint_thing(i), + name=pprint_thing(name), + count=pprint_thing(count), + dtype=pprint_thing(dtype), + ) + for i, (name, count, dtype) in enumerate( + zip(self.columns, counts, self.dtypes) + ) ] + lines.extend(header.split("\n")) else: column_info = [index_summary(self.columns, name="Columns")] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/groupby.py new/dask-2.10.1/dask/dataframe/groupby.py --- old/dask-2.9.2/dask/dataframe/groupby.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/groupby.py 2020-01-28 18:29:39.000000000 +0100 @@ -298,18 +298,18 @@ df = df.to_frame() df = df.copy() - cols = df._get_numeric_data().columns g = _groupby_raise_unaligned(df, by=index) x = g.sum() n = g[x.columns].count().rename(columns=lambda c: (c, "-count")) + cols = x.columns df[cols] = df[cols] ** 2 + g2 = _groupby_raise_unaligned(df, by=index) x2 = g2.sum().rename(columns=lambda c: (c, "-x2")) - x2.index = x.index return concat([x, x2, n], axis=1) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/io/parquet/arrow.py new/dask-2.10.1/dask/dataframe/io/parquet/arrow.py --- old/dask-2.9.2/dask/dataframe/io/parquet/arrow.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/io/parquet/arrow.py 2020-01-28 18:29:39.000000000 +0100 @@ -215,9 +215,12 @@ if ( gather_statistics is None and dataset.metadata - and dataset.metadata.num_row_groups == len(pieces) + and dataset.metadata.num_row_groups >= len(pieces) ): gather_statistics = True + # Don't gather stats by default if this is a partitioned dataset + if dataset.metadata.num_row_groups != len(pieces) and partitions: + gather_statistics = False if not pieces: gather_statistics = False diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/io/tests/test_csv.py new/dask-2.10.1/dask/dataframe/io/tests/test_csv.py --- old/dask-2.9.2/dask/dataframe/io/tests/test_csv.py 2020-01-17 00:23:43.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/io/tests/test_csv.py 2020-01-28 18:29:39.000000000 +0100 @@ -617,7 +617,7 @@ c,c """ ) - dtype = pd.api.types.CategoricalDtype(["a", "b", "c"]) + dtype = pd.api.types.CategoricalDtype(["a", "b", "c"], ordered=False) with filetexts({"foo.1.csv": text1, "foo.2.csv": text2}): result = dd.read_csv("foo.*.csv", dtype={"A": "category", "B": "category"}) assert result.A.cat.known is False @@ -654,7 +654,9 @@ assert_eq(result, expected) # Specify "unknown" categories - result = dd.read_csv("foo.*.csv", dtype=pd.api.types.CategoricalDtype()) + result = dd.read_csv( + "foo.*.csv", dtype=pd.api.types.CategoricalDtype(ordered=False) + ) assert result.A.cat.known is False result = dd.read_csv("foo.*.csv", dtype="category") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/io/tests/test_hdf.py new/dask-2.10.1/dask/dataframe/io/tests/test_hdf.py --- old/dask-2.9.2/dask/dataframe/io/tests/test_hdf.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/io/tests/test_hdf.py 2020-01-30 17:30:35.000000000 +0100 @@ -13,11 +13,6 @@ from dask.dataframe.utils import assert_eq -skip_pandas_rc = pytest.mark.xfail( - dd._compat.PANDAS_GT_100, reason="https://github.com/pandas-dev/pandas/issues/30962" -) - - def test_to_hdf(): pytest.importorskip("tables") df = pd.DataFrame( @@ -49,7 +44,6 @@ tm.assert_frame_equal(df, out[:]) -@skip_pandas_rc def test_to_hdf_multiple_nodes(): pytest.importorskip("tables") df = pd.DataFrame( @@ -130,7 +124,6 @@ assert_eq(df16, out) -@skip_pandas_rc def test_to_hdf_multiple_files(): pytest.importorskip("tables") df = pd.DataFrame( @@ -214,7 +207,6 @@ assert_eq(df, out) -@skip_pandas_rc def test_to_hdf_modes_multiple_nodes(): pytest.importorskip("tables") df = pd.DataFrame( @@ -263,7 +255,6 @@ assert_eq(df.append(df), out) -@skip_pandas_rc def test_to_hdf_modes_multiple_files(): pytest.importorskip("tables") df = pd.DataFrame( @@ -376,7 +367,6 @@ @pytest.mark.slow -@skip_pandas_rc def test_to_hdf_lock_delays(): pytest.importorskip("tables") df16 = pd.DataFrame( @@ -468,7 +458,6 @@ @pytest.mark.parametrize("scheduler", ["sync", "threads", "processes"]) @pytest.mark.parametrize("npartitions", [1, 4, 10]) -@skip_pandas_rc def test_to_hdf_schedulers(scheduler, npartitions): pytest.importorskip("tables") df = pd.DataFrame( @@ -620,7 +609,6 @@ (pd.Series([1, 2, 3, 4], name="a"), tm.assert_series_equal), ], ) -@skip_pandas_rc def test_read_hdf(data, compare): pytest.importorskip("tables") with tmpfile("h5") as fn: @@ -656,7 +644,6 @@ compare(a.compute(), sorted_data) -@skip_pandas_rc def test_read_hdf_multiply_open(): """Test that we can read from a file that's already opened elsewhere in read-only mode.""" @@ -670,7 +657,6 @@ dd.read_hdf(fn, "/data", chunksize=2, mode="r") -@skip_pandas_rc def test_read_hdf_multiple(): pytest.importorskip("tables") df = pd.DataFrame( @@ -724,7 +710,6 @@ assert_eq(a, r) -@skip_pandas_rc def test_read_hdf_start_stop_values(): pytest.importorskip("tables") df = pd.DataFrame( @@ -743,7 +728,6 @@ dd.read_hdf(fn, "/data", chunksize=-1) -@skip_pandas_rc def test_hdf_globbing(): pytest.importorskip("tables") df = pd.DataFrame( @@ -781,7 +765,6 @@ tm.assert_frame_equal(res.compute(), pd.concat([df] * 3)) -@skip_pandas_rc def test_hdf_file_list(): pytest.importorskip("tables") df = pd.DataFrame( @@ -798,7 +781,6 @@ tm.assert_frame_equal(res.compute(), df) -@skip_pandas_rc def test_read_hdf_pattern_pathlike(): pytest.importorskip("tables") df = pd.DataFrame( @@ -826,7 +808,6 @@ assert_eq(res, ddf) -@skip_pandas_rc def test_read_hdf_doesnt_segfault(): pytest.importorskip("tables") with tmpfile("h5") as fn: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/io/tests/test_parquet.py new/dask-2.10.1/dask/dataframe/io/tests/test_parquet.py --- old/dask-2.9.2/dask/dataframe/io/tests/test_parquet.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/io/tests/test_parquet.py 2020-01-28 18:29:39.000000000 +0100 @@ -2095,13 +2095,7 @@ tmp, engine="pyarrow", row_group_size=100 ) - ddf3 = dd.read_parquet( - tmp, - engine="pyarrow", - gather_statistics=True, - split_row_groups=True, - chunksize=1, - ) + ddf3 = dd.read_parquet(tmp, engine="pyarrow", split_row_groups=True, chunksize=1) assert ddf3.npartitions == 4 ddf3 = dd.read_parquet( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/tests/test_accessors.py new/dask-2.10.1/dask/dataframe/tests/test_accessors.py --- old/dask-2.9.2/dask/dataframe/tests/test_accessors.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/tests/test_accessors.py 2020-01-28 18:29:39.000000000 +0100 @@ -81,6 +81,10 @@ index=["E", "f", "g", "h"], ) + if dd._compat.PANDAS_GT_100: + df["string_col"] = df["str_col"].astype("string") + df.loc["E", "string_col"] = pd.NA + ddf = dd.from_pandas(df, 2) return df, ddf @@ -121,6 +125,8 @@ # implemented methods are present in tab completion assert "upper" in dir(ddf.str_col.str) + if dd._compat.PANDAS_GT_100: + assert "upper" in dir(ddf.string_col.str) assert "upper" in dir(ddf.index.str) # not implemented methods don't show up @@ -131,11 +137,19 @@ assert_eq(ddf.str_col.str.upper(), df.str_col.str.upper()) assert set(ddf.str_col.str.upper().dask) == set(ddf.str_col.str.upper().dask) + if dd._compat.PANDAS_GT_100: + assert_eq(ddf.string_col.str.upper(), df.string_col.str.upper()) + assert set(ddf.string_col.str.upper().dask) == set( + ddf.string_col.str.upper().dask + ) + assert_eq(ddf.index.str.upper(), df.index.str.upper()) assert set(ddf.index.str.upper().dask) == set(ddf.index.str.upper().dask) # make sure to pass thru args & kwargs assert_eq(ddf.str_col.str.contains("a"), df.str_col.str.contains("a")) + if dd._compat.PANDAS_GT_100: + assert_eq(ddf.string_col.str.contains("a"), df.string_col.str.contains("a")) assert set(ddf.str_col.str.contains("a").dask) == set( ddf.str_col.str.contains("a").dask ) @@ -242,3 +256,10 @@ ds = dd.from_pandas(s, npartitions=2) ds.str.split(n=10, expand=True).compute() + + [email protected](not dd._compat.PANDAS_GT_100, reason="No StringDtype") +def test_string_nullable_types(df_ddf): + df, ddf = df_ddf + assert_eq(ddf.string_col.str.count("A"), df.string_col.str.count("A")) + assert_eq(ddf.string_col.str.isalpha(), df.string_col.str.isalpha()) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/tests/test_boolean.py new/dask-2.10.1/dask/dataframe/tests/test_boolean.py --- old/dask-2.9.2/dask/dataframe/tests/test_boolean.py 1970-01-01 01:00:00.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/tests/test_boolean.py 2020-01-28 18:29:39.000000000 +0100 @@ -0,0 +1,37 @@ +import pandas as pd +import pytest + +import dask.dataframe as dd + + +pytestmark = pytest.mark.skipif( + not dd._compat.PANDAS_GT_100, reason="BooleanArray added in 1.0.0" +) + + +def test_meta(): + values = pd.array([True, False, None], dtype="boolean") + ds = dd.from_pandas(pd.Series(values), 2) + assert ds.dtype == pd.BooleanDtype() + + dd.utils.assert_eq(ds._meta_nonempty, pd.Series([True, pd.NA], dtype="boolean")) + + ddf = dd.from_pandas(pd.DataFrame({"A": values}), 2) + assert ddf.dtypes["A"] == pd.BooleanDtype() + + dd.utils.assert_eq( + ddf._meta_nonempty, + pd.DataFrame({"A": pd.array([True, pd.NA], dtype="boolean")}), + ) + + +def test_ops(): + s1 = pd.Series(pd.array([True, False, None] * 3, dtype="boolean")) + s2 = pd.Series(pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")) + + ds1 = dd.from_pandas(s1, 2) + ds2 = dd.from_pandas(s2, 2) + + dd.utils.assert_eq(ds1 | ds2, s1 | s2) + dd.utils.assert_eq(ds1 & ds2, s1 & s2) + dd.utils.assert_eq(ds1 ^ ds2, s1 ^ s2) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/tests/test_dataframe.py new/dask-2.10.1/dask/dataframe/tests/test_dataframe.py --- old/dask-2.9.2/dask/dataframe/tests/test_dataframe.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/tests/test_dataframe.py 2020-01-30 16:12:49.000000000 +0100 @@ -2981,8 +2981,8 @@ ) ddf = dd.from_pandas(df, 2) - abc = pd.api.types.CategoricalDtype(["a", "b", "c"]) - category = pd.api.types.CategoricalDtype() + abc = pd.api.types.CategoricalDtype(["a", "b", "c"], ordered=False) + category = pd.api.types.CategoricalDtype(ordered=False) # DataFrame ddf2 = ddf.astype({"x": abc, "y": category, "z": "category", "other": "f8"}) @@ -3037,6 +3037,7 @@ assert stdout_pd == stdout_da [email protected](not dd._compat.PANDAS_GT_100, reason="Changed info repr") def test_info(): from io import StringIO @@ -3071,6 +3072,7 @@ assert ddf.info(buf=None) is None [email protected](not dd._compat.PANDAS_GT_100, reason="Changed info repr") def test_groupby_multilevel_info(): # GH 1844 from io import StringIO @@ -3106,6 +3108,7 @@ assert buf.getvalue() == expected [email protected](not dd._compat.PANDAS_GT_100, reason="Changed info repr") def test_categorize_info(): # assert that we can call info after categorize # workaround for: https://github.com/pydata/pandas/issues/14368 @@ -3126,9 +3129,11 @@ "<class 'dask.dataframe.core.DataFrame'>\n" "Int64Index: 4 entries, 0 to 3\n" "Data columns (total 3 columns):\n" - "x 4 non-null int64\n" - "y 4 non-null category\n" - "z 4 non-null object\n" + " # Column Non-Null Count Dtype\n" + "--- ------ -------------- -----\n" + " 0 x 4 non-null int64\n" + " 1 y 4 non-null category\n" + " 2 z 4 non-null object\n" "dtypes: category(1), object(1), int64(1)" ) assert buf.getvalue() == expected @@ -3475,20 +3480,9 @@ assert_eq(f(ddf.A, offset), f(df.A, offset)) -xfail_hash_object = pytest.mark.xfail(PANDAS_GT_100, reason="GH-30887") - - @pytest.mark.parametrize("npartitions", [1, 4, 20]) @pytest.mark.parametrize("split_every", [2, 5]) [email protected]( - "split_out", - [ - None, - 1, - pytest.param(5, marks=xfail_hash_object), - pytest.param(20, marks=xfail_hash_object), - ], -) [email protected]("split_out", [None, 1, 5, 20]) def test_hash_split_unique(npartitions, split_every, split_out): from string import ascii_lowercase @@ -4093,9 +4087,6 @@ @pytest.mark.parametrize("map_npart", [1, 3]) @pytest.mark.parametrize("sorted_index", [False, True]) @pytest.mark.parametrize("sorted_map_index", [False, True]) [email protected]( - PANDAS_GT_100, reason="https://github.com/pandas-dev/pandas/issues/30887" -) def test_series_map(base_npart, map_npart, sorted_index, sorted_map_index): base = pd.Series( ["".join(np.random.choice(["a", "b", "c"], size=3)) for x in range(100)] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/tests/test_groupby.py new/dask-2.10.1/dask/dataframe/tests/test_groupby.py --- old/dask-2.9.2/dask/dataframe/tests/test_groupby.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/tests/test_groupby.py 2020-01-30 16:12:49.000000000 +0100 @@ -1953,7 +1953,7 @@ cols = 3 data = np.random.randn(rows, cols) df = pd.DataFrame(data, columns=columns) - df["key"] = np.random.randint(0, cols, size=rows) + df["key"] = [0] * 10 + [1] * 5 + [2] * 5 ddf = dd.from_pandas(df, npartitions=3) expected = df.groupby("key").cov() @@ -2329,3 +2329,24 @@ ) assert_eq(ddf_result, ddf_result_so1, check_index=False) + + [email protected]("backend", ["cudf", "pandas"]) +def test_groupby_large_ints_exception(backend): + data_source = pytest.importorskip(backend) + if backend == "cudf": + dask_cudf = pytest.importorskip("dask_cudf") + data_frame = dask_cudf.from_cudf + else: + data_frame = dd.from_pandas + max = np.iinfo(np.uint64).max + sqrt = max ** 0.5 + series = data_source.Series( + np.concatenate([sqrt * np.arange(5), np.arange(35)]) + ).astype("int64") + df = data_source.DataFrame({"x": series, "z": np.arange(40), "y": np.arange(40)}) + ddf = data_frame(df, npartitions=1) + assert_eq( + df.groupby("x").std(), + ddf.groupby("x").std().compute(scheduler="single-threaded"), + ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/tests/test_hashing.py new/dask-2.10.1/dask/dataframe/tests/test_hashing.py --- old/dask-2.9.2/dask/dataframe/tests/test_hashing.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/tests/test_hashing.py 2020-01-28 18:29:39.000000000 +0100 @@ -2,7 +2,7 @@ import pandas as pd import dask.dataframe as dd from dask.dataframe import _compat -from dask.dataframe._compat import tm, PANDAS_GT_100 +from dask.dataframe._compat import tm from pandas.util import hash_pandas_object import pytest @@ -65,24 +65,16 @@ tm.assert_series_equal(h1, h2) -xfail_hash_object = pytest.mark.xfail( - PANDAS_GT_100, reason="https://github.com/pandas-dev/pandas/issues/30887" -) - - @pytest.mark.parametrize( "obj", [ pd.Index([1, 2, 3]), - pytest.param(pd.Index([True, False, True]), marks=xfail_hash_object), + pd.Index([True, False, True]), pd.Series([1, 2, 3]), pd.Series([1.0, 1.5, 3.2]), pd.Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]), pd.DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}), - pytest.param( - pd.DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}, index=["a", "z", "x"]), - marks=xfail_hash_object, - ), + pd.DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}, index=["a", "z", "x"]), ], ) def test_hash_object_dispatch(obj): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/tests/test_multi.py new/dask-2.10.1/dask/dataframe/tests/test_multi.py --- old/dask-2.9.2/dask/dataframe/tests/test_multi.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/tests/test_multi.py 2020-01-28 18:29:39.000000000 +0100 @@ -6,7 +6,7 @@ import pandas as pd from dask.base import compute_as_if_collection -from dask.dataframe._compat import tm, PANDAS_GT_100 +from dask.dataframe._compat import tm from dask.dataframe.core import _Frame from dask.dataframe.methods import concat, concat_kwargs from dask.dataframe.multi import ( @@ -1944,9 +1944,6 @@ assert_eq(r1, r2) [email protected]( - PANDAS_GT_100, reason="https://github.com/pandas-dev/pandas/issues/30887" -) def test_merge_outer_empty(): # Issue #5470 bug reproducer k_clusters = 3 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/tests/test_shuffle.py new/dask-2.10.1/dask/dataframe/tests/test_shuffle.py --- old/dask-2.9.2/dask/dataframe/tests/test_shuffle.py 2020-01-17 00:23:43.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/tests/test_shuffle.py 2020-01-28 18:29:39.000000000 +0100 @@ -12,7 +12,7 @@ import dask import dask.dataframe as dd -from dask.dataframe._compat import tm, PANDAS_GT_100, assert_categorical_equal +from dask.dataframe._compat import tm, assert_categorical_equal from dask import delayed from dask.base import compute_as_if_collection from dask.dataframe.shuffle import ( @@ -142,9 +142,6 @@ assert len(np.unique(res)) > 1 [email protected]( - PANDAS_GT_100, reason="https://github.com/pandas-dev/pandas/issues/30887" -) def test_partitioning_index_categorical_on_values(): df = pd.DataFrame({"a": list(string.ascii_letters), "b": [1, 2, 3, 4] * 13}) df.a = df.a.astype("category") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/tseries/resample.py new/dask-2.10.1/dask/dataframe/tseries/resample.py --- old/dask-2.9.2/dask/dataframe/tseries/resample.py 2019-12-16 22:41:56.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/tseries/resample.py 2020-01-28 18:29:39.000000000 +0100 @@ -6,6 +6,7 @@ from ...base import tokenize from ...utils import derived_from from ...highlevelgraph import HighLevelGraph +from .._compat import PANDAS_GT_0240 def getnanos(rule): @@ -30,12 +31,20 @@ out = getattr(series.resample(rule, **resample_kwargs), how)( *how_args, **how_kwargs ) - return out.reindex( - pd.date_range( + if PANDAS_GT_0240: + new_index = pd.date_range( + start.tz_localize(None), + end.tz_localize(None), + freq=rule, + closed=reindex_closed, + name=out.index.name, + ).tz_localize(start.tz, nonexistent="shift_forward") + + else: + new_index = pd.date_range( start, end, freq=rule, closed=reindex_closed, name=out.index.name - ), - fill_value=fill_value, - ) + ) + return out.reindex(new_index, fill_value=fill_value) def _resample_bin_and_out_divs(divisions, rule, closed="left", label="left"): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/tseries/tests/test_resample.py new/dask-2.10.1/dask/dataframe/tseries/tests/test_resample.py --- old/dask-2.9.2/dask/dataframe/tseries/tests/test_resample.py 2019-12-16 22:41:56.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/tseries/tests/test_resample.py 2020-01-28 18:29:39.000000000 +0100 @@ -4,6 +4,7 @@ import pytest from dask.dataframe.utils import assert_eq, PANDAS_VERSION +from dask.dataframe._compat import PANDAS_GT_0240 import dask.dataframe as dd @@ -103,6 +104,22 @@ assert ddf.resample("D").mean().head().index.name == "date" [email protected](not PANDAS_GT_0240, reason="nonexistent not in 0.23 or older") +def test_series_resample_non_existent_datetime(): + index = [ + pd.Timestamp("2016-10-15 00:00:00"), + pd.Timestamp("2016-10-16 10:00:00"), + pd.Timestamp("2016-10-17 00:00:00"), + ] + df = pd.DataFrame([[1], [2], [3]], index=index) + df.index = df.index.tz_localize("America/Sao_Paulo") + ddf = dd.from_pandas(df, npartitions=1) + result = ddf.resample("1D").mean() + expected = df.resample("1D").mean() + + assert_eq(result, expected) + + @pytest.mark.skipif(PANDAS_VERSION <= "0.23.4", reason="quantile not in 0.23") @pytest.mark.parametrize("agg", ["nunique", "mean", "count", "size", "quantile"]) def test_common_aggs(agg): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/dataframe/utils.py new/dask-2.10.1/dask/dataframe/utils.py --- old/dask-2.9.2/dask/dataframe/utils.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/dataframe/utils.py 2020-01-28 18:29:39.000000000 +0100 @@ -39,6 +39,9 @@ from ..utils import is_series_like as dask_is_series_like from ..utils import is_index_like as dask_is_index_like +# register pandas extension types +from . import _dtypes # noqa: F401 + def is_integer_na_dtype(t): dtype = getattr(t, "dtype", t) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/tests/test_base.py new/dask-2.10.1/dask/tests/test_base.py --- old/dask-2.9.2/dask/tests/test_base.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/tests/test_base.py 2020-01-28 18:29:39.000000000 +0100 @@ -285,6 +285,46 @@ tokenize(df) [email protected]("not pd") +def test_tokenize_pandas_extension_array(): + from dask.dataframe._compat import PANDAS_GT_100, PANDAS_GT_0240 + + if not PANDAS_GT_0240: + pytest.skip("requires pandas>=1.0.0") + + arrays = [ + pd.array([1, 0, None], dtype="Int64"), + pd.array(["2000"], dtype="Period[D]"), + pd.array([1, 0, 0], dtype="Sparse[int]"), + pd.array([pd.Timestamp("2000")], dtype="datetime64[ns]"), + pd.array([pd.Timestamp("2000", tz="CET")], dtype="datetime64[ns, CET]"), + pd.array( + ["a", "b"], + dtype=pd.api.types.CategoricalDtype(["a", "b", "c"], ordered=False), + ), + ] + + if PANDAS_GT_100: + arrays.extend( + [ + pd.array(["a", "b", None], dtype="string"), + pd.array([True, False, None], dtype="boolean"), + ] + ) + + for arr in arrays: + assert tokenize(arr) == tokenize(arr) + + [email protected]("not pd") +def test_tokenize_pandas_index(): + idx = pd.Index(["a", "b"]) + assert tokenize(idx) == tokenize(idx) + + idx = pd.MultiIndex.from_product([["a", "b"], [0, 1]]) + assert tokenize(idx) == tokenize(idx) + + def test_tokenize_kwargs(): assert tokenize(5, x=1) == tokenize(5, x=1) assert tokenize(5) != tokenize(5, x=1) @@ -456,6 +496,7 @@ if dataclasses is not None: t[2]["f"] = ADataClass(a=a) + t[2]["g"] = (ADataClass, a) return t diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask/tests/test_distributed.py new/dask-2.10.1/dask/tests/test_distributed.py --- old/dask-2.9.2/dask/tests/test_distributed.py 2020-01-14 20:29:53.000000000 +0100 +++ new/dask-2.10.1/dask/tests/test_distributed.py 2020-01-30 17:30:35.000000000 +0100 @@ -136,12 +136,8 @@ ], ) def test_to_hdf_scheduler_distributed(npartitions, c): - dd = pytest.importorskip("dask.dataframe") from ..dataframe.io.tests.test_hdf import test_to_hdf_schedulers - if dd._compat.PANDAS_GT_100: - pytest.skip("https://github.com/pandas-dev/pandas/issues/30962") - test_to_hdf_schedulers(None, npartitions) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask.egg-info/PKG-INFO new/dask-2.10.1/dask.egg-info/PKG-INFO --- old/dask-2.9.2/dask.egg-info/PKG-INFO 2020-01-17 00:50:12.000000000 +0100 +++ new/dask-2.10.1/dask.egg-info/PKG-INFO 2020-01-30 17:44:56.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: dask -Version: 2.9.2 +Version: 2.10.1 Summary: Parallel PyData with Task Scheduling Home-page: https://github.com/dask/dask/ Maintainer: Matthew Rocklin diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/dask.egg-info/SOURCES.txt new/dask-2.10.1/dask.egg-info/SOURCES.txt --- old/dask-2.9.2/dask.egg-info/SOURCES.txt 2020-01-17 00:50:12.000000000 +0100 +++ new/dask-2.10.1/dask.egg-info/SOURCES.txt 2020-01-30 17:44:56.000000000 +0100 @@ -120,6 +120,7 @@ dask/dataframe/__init__.py dask/dataframe/_accessor.py dask/dataframe/_compat.py +dask/dataframe/_dtypes.py dask/dataframe/accessor.py dask/dataframe/backends.py dask/dataframe/categorical.py @@ -162,6 +163,7 @@ dask/dataframe/tests/__init__.py dask/dataframe/tests/test_accessors.py dask/dataframe/tests/test_arithmetics_reduction.py +dask/dataframe/tests/test_boolean.py dask/dataframe/tests/test_categorical.py dask/dataframe/tests/test_dataframe.py dask/dataframe/tests/test_extensions.py @@ -321,6 +323,8 @@ docs/source/images/frame-sort.svg docs/source/images/frame.png docs/source/images/frame.svg +docs/source/images/growth_of_languages.png +docs/source/images/growth_of_libraries.png docs/source/images/inc-add.svg docs/source/images/optimize_dask1.png docs/source/images/optimize_dask1.svg diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/docs/source/changelog.rst new/dask-2.10.1/docs/source/changelog.rst --- old/dask-2.9.2/docs/source/changelog.rst 2020-01-17 00:45:59.000000000 +0100 +++ new/dask-2.10.1/docs/source/changelog.rst 2020-01-30 17:40:38.000000000 +0100 @@ -1,6 +1,26 @@ Changelog ========= +2.10.1 / 2020-01-30 +------------------- + +- Fix Pandas 1.0 version comparison (:pr:`5851`) `Tom Augspurger`_ +- Fix typo in distributed diagnostics documentation (:pr:`5841`) `Gerrit Holl`_ + + +2.10.0 / 2020-01-28 +------------------- + +- Support for pandas 1.0's new ``BooleanDtype`` and ``StringDtype`` (:pr:`5815`) `Tom Augspurger`_ +- Compatibility with pandas 1.0's API breaking changes and deprecations (:pr:`5792`) `Tom Augspurger`_ +- Fixed non-deterministic tokenization of some extension-array backed pandas objects (:pr:`5813`) `Tom Augspurger`_ +- Fixed handling of dataclass class objects in collections (:pr:`5812`) `Matteo De Wint`_ +- Fixed resampling with tz-aware dates when one of the endpoints fell in a non-existent time (:pr:`5807`) `dfonnegra`_ +- Delay initial Zarr dataset creation until the computation occurs (:pr:`5797`) `Chris Roat`_ +- Use parquet dataset statistics in more cases with the ``pyarrow`` engine (:pr:`5799`) `Richard J Zamora`_ +- Fixed exception in ``groupby.std()`` when some of the keys were large integers (:pr:`5737`) `H. Thomson Comer`_ + + 2.9.2 / 2020-01-16 ------------------ @@ -2851,3 +2871,7 @@ .. _`Timost`: https://github.com/Timost .. _`Maarten Breddels`: https://github.com/maartenbreddels .. _`Devin Petersohn`: https://github.com/devin-petersohn +.. _`dfonnegra`: https://github.com/dfonnegra +.. _`Chris Roat`: https://github.com/ChrisRoat +.. _`H. Thomson Comer`: https://github.com/thomcom +.. _`Gerrit Holl`: https://github.com/gerritholl diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/docs/source/develop.rst new/dask-2.10.1/docs/source/develop.rst --- old/dask-2.9.2/docs/source/develop.rst 2019-11-19 03:44:49.000000000 +0100 +++ new/dask-2.10.1/docs/source/develop.rst 2020-01-30 16:12:40.000000000 +0100 @@ -152,7 +152,7 @@ py.test dask/dataframe --verbose - py.test dask/dataframe/tests/test_dataframe_core.py::test_set_index + py.test dask/dataframe/tests/test_dataframe.py::test_rename_index Tests run automatically on the Travis.ci and Appveyor continuous testing frameworks on every push to every pull request on GitHub. @@ -161,6 +161,7 @@ dask/array/tests/test_*.py dask/bag/tests/test_*.py + dask/bytes/tests/test_*.py dask/dataframe/tests/test_*.py dask/diagnostics/tests/test_*.py diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/docs/source/diagnostics-distributed.rst new/dask-2.10.1/docs/source/diagnostics-distributed.rst --- old/dask-2.9.2/docs/source/diagnostics-distributed.rst 2019-12-18 23:16:42.000000000 +0100 +++ new/dask-2.10.1/docs/source/diagnostics-distributed.rst 2020-01-30 17:30:25.000000000 +0100 @@ -33,7 +33,7 @@ It is typically served at http://localhost:8787/status , but may be served elsewhere if this port is taken. The address of the dashboard will be displayed if you are in a Jupyter Notebook, -or can be queriesd from ``client.scheduler_info()['services']``. +or can be queried from ``client.scheduler_info()['services']``. There are numerous pages with information about task runtimes, communication, statistical profiling, load balancing, memory use, and much more. Binary files old/dask-2.9.2/docs/source/images/growth_of_languages.png and new/dask-2.10.1/docs/source/images/growth_of_languages.png differ Binary files old/dask-2.9.2/docs/source/images/growth_of_libraries.png and new/dask-2.10.1/docs/source/images/growth_of_libraries.png differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-2.9.2/docs/source/why.rst new/dask-2.10.1/docs/source/why.rst --- old/dask-2.9.2/docs/source/why.rst 2019-11-20 02:10:36.000000000 +0100 +++ new/dask-2.10.1/docs/source/why.rst 2020-01-28 18:29:39.000000000 +0100 @@ -11,7 +11,7 @@ Python has grown to become the dominant language both in data analytics and general programming: -.. image:: https://zgab33vy595fw5zq-zippykid.netdna-ssl.com/wp-content/uploads/2017/09/growth_major_languages-1-1024x878.png +.. image:: images/growth_of_languages.png :alt: Growth of major programming languages :width: 75% @@ -19,7 +19,7 @@ Scikit-Learn and by a wealth of libraries for visualization, interactive notebooks, collaboration, and so forth. -.. image:: https://zgab33vy595fw5zq-zippykid.netdna-ssl.com/wp-content/uploads/2017/09/related_tags_over_time-1-1024x1024.png +.. image:: images/growth_of_libraries.png :alt: Stack overflow traffic to various packages :width: 75%
