Hello community, here is the log from the commit of package python-intake for openSUSE:Factory checked in at 2020-02-03 11:13:30 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-intake (Old) and /work/SRC/openSUSE:Factory/.python-intake.new.26092 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-intake" Mon Feb 3 11:13:30 2020 rev:4 rq:768879 version:0.5.4 Changes: -------- --- /work/SRC/openSUSE:Factory/python-intake/python-intake.changes 2019-11-20 10:28:49.714581310 +0100 +++ /work/SRC/openSUSE:Factory/.python-intake.new.26092/python-intake.changes 2020-02-03 11:13:35.945853447 +0100 @@ -1,0 +2,23 @@ +Thu Jan 30 19:12:57 UTC 2020 - Todd R <[email protected]> + +- Update to 0.5.4 + * Allow for hvplot returning dynamic layouts + * Make sure that ttl is numeric + * add intake-nested-yaml-catalog plugin + * check_meta_flag -> verify_meta + * forgot to add pickle & json + * fix dask version constraint + * Don't replace "inteke" in remote server name + * Update intake/gui/source/defined_plots.py + * Use a generator for pagination. + * add address to log + * add server cli kwarg for address + * dask.bytes to fsspec + * don't check meta when merging dataframes + * pin fsspec >= 0.3.6 + * return storage options if given + * update open_file link + * use the given encoder if not in the list + * verify kwarg and dask version check + +------------------------------------------------------------------- Old: ---- intake-0.5.3.tar.gz New: ---- intake-0.5.4.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-intake.spec ++++++ --- /var/tmp/diff_new_pack.YyQWZC/_old 2020-02-03 11:13:37.393854178 +0100 +++ /var/tmp/diff_new_pack.YyQWZC/_new 2020-02-03 11:13:37.417854191 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-intake # -# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2020 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -19,7 +19,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} %define skip_python2 1 Name: python-intake -Version: 0.5.3 +Version: 0.5.4 Release: 0 Summary: Data loading and cataloging system License: BSD-2-Clause @@ -35,6 +35,8 @@ Requires: python-dask-array Requires: python-dask-bag >= 1.0 Requires: python-dask-dataframe +Requires: python-entrypoints +Requires: python-fsspec >= 0.3.6 Requires: python-holoviews Requires: python-hvplot Requires: python-ipywidgets >= 7.2 @@ -57,6 +59,8 @@ BuildRequires: %{python_module dask-bag >= 1.0} BuildRequires: %{python_module dask-dataframe} BuildRequires: %{python_module dask} +BuildRequires: %{python_module entrypoints} +BuildRequires: %{python_module fsspec >= 0.3.6} BuildRequires: %{python_module holoviews} BuildRequires: %{python_module hvplot} BuildRequires: %{python_module ipywidgets >= 7.2} @@ -78,6 +82,9 @@ %prep %setup -q -n intake-%{version} +sed -i -e '/^#!\//, 1d' intake/catalog/tests/test_persist.py +sed -i -e '/^#!\//, 1d' intake/container/tests/__init__.py +sed -i -e '/^#!\//, 1d' intake/container/tests/test_generics.py %build %python_build ++++++ intake-0.5.3.tar.gz -> intake-0.5.4.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/PKG-INFO new/intake-0.5.4/PKG-INFO --- old/intake-0.5.3/PKG-INFO 2019-08-07 20:46:08.000000000 +0200 +++ new/intake-0.5.4/PKG-INFO 2019-12-18 14:32:50.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: intake -Version: 0.5.3 +Version: 0.5.4 Summary: Data load and catalog system Home-page: https://github.com/intake/intake Maintainer: Martin Durant @@ -71,7 +71,7 @@ Classifier: Programming Language :: Python :: 3.7 Requires-Python: >=3.6 Description-Content-Type: text/markdown +Provides-Extra: complete Provides-Extra: server -Provides-Extra: dataframe Provides-Extra: plot -Provides-Extra: complete +Provides-Extra: dataframe diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/docs/source/auth-plugins.rst new/intake-0.5.4/docs/source/auth-plugins.rst --- old/intake-0.5.3/docs/source/auth-plugins.rst 2018-08-03 15:30:37.000000000 +0200 +++ new/intake-0.5.4/docs/source/auth-plugins.rst 2019-11-06 23:49:03.000000000 +0100 @@ -21,7 +21,7 @@ plugin would be configured this way:: auth: - class: intake.auth.secret.Secret + cls: intake.auth.secret.SecretAuth kwargs: secret: A_SECRET_HASH diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/docs/source/data-packages.rst new/intake-0.5.4/docs/source/data-packages.rst --- old/intake-0.5.3/docs/source/data-packages.rst 2019-07-11 18:44:51.000000000 +0200 +++ new/intake-0.5.4/docs/source/data-packages.rst 2019-09-18 17:12:53.000000000 +0200 @@ -1,17 +1,19 @@ Making Data Packages ==================== -Combined with the `Conda Package Manger <https://conda.io/docs/>`_, Intake makes it possible to create *data packages* -which can be installed and upgraded just like software packages. This offers several advantages: +Combined with the `Conda Package Manger <https://conda.io/docs/>`_, Intake +makes it possible to create :term:`Data packages` which can be installed and upgraded just like +software packages. This offers several advantages: - * Distributing datasets becomes as easy ``conda install`` + * Distributing Catalogs and Drivers becomes as easy as ``conda install`` * Data packages can be versioned, improving reproducibility in some cases * Data packages can depend on the libraries required for reading * Data packages can be self-describing using Intake catalog files - * Applications that need certain datasets can include data packages in their dependency list + * Applications that need certain Catalogs can include data packages in their dependency list -In this tutorial, we give a walkthrough to enable you to distribute any dataset to others, so that they can access the -data using Intake without worrying about where it resides or how it should be loaded. +In this tutorial, we give a walkthrough to enable you to distribute any +Catalogs to others, so that they can access the data using Intake without worrying about where it +resides or how it should be loaded. Implementation '''''''''''''' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/docs/source/glossary.rst new/intake-0.5.4/docs/source/glossary.rst --- old/intake-0.5.3/docs/source/glossary.rst 2019-03-11 16:31:56.000000000 +0100 +++ new/intake-0.5.4/docs/source/glossary.rst 2019-09-18 17:12:53.000000000 +0200 @@ -19,6 +19,10 @@ server or another third-party data service, like a SQL database. Thus, catalogs form a hierarchy: any catalog can contain other, nested catalogs. + Catalog file + A :term:`YAML` specification file which contains a list of named entries describing how to load data + sources. :doc:`catalog`. + Conda A package and environment management package for the python ecosystem, see the `conda website`_. Conda ensures dependencies and correct versions are installed for you, provides precompiled, binary-compatible software, @@ -54,6 +58,12 @@ easily as possible. In many organisations, the appropriate job title may be Data Scientist, but research scientists and BI/analysts also fit this description. + Data packages + Data packages are standard conda packages that install an Intake catalog file into the user’s conda + environment ($CONDA_PREFIX/share/intake). A data package does not necessarily imply there are data files + inside the package. A data package could describe remote data sources (such as files in S3) and take up + very little space on disk. + Data Provider A person whose main objective is to curate data sources, get them into appropriate formats, describe the contents, and disseminate the data to those that need to use them. Such a person diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/docs/source/making-plugins.rst new/intake-0.5.4/docs/source/making-plugins.rst --- old/intake-0.5.3/docs/source/making-plugins.rst 2019-07-29 22:21:20.000000000 +0200 +++ new/intake-0.5.4/docs/source/making-plugins.rst 2019-09-18 17:12:53.000000000 +0200 @@ -220,16 +220,39 @@ ... entry_points={ 'intake.drivers': [ - 'some_format_name = some_package.and_maybe_a_submodule.YourDriverClass', + 'some_format_name = some_package.and_maybe_a_submodule:YourDriverClass', ... ] }, ) +.. important:: + + Some critical details of Python's entrypoints feature: + + * Note the unusual syntax of the entrypoints. Each item is given as one long + string, with the ``=`` as part of the string. Modules are separated by + ``.``, and the final object name is preceded by ``:``. + * The right hand side of the equals sign must point to where the object is + *actually defined*. If ``YourDriverClass`` is defined in + ``foo/bar.py`` and imported into ``foo/__init__.py`` you might expect + ``foo:YourDriverClass`` to work, but it does not. You must spell out + ``foo.bar:YourDriverClass``. + Entry points are a way for Python packages to advertise objects with some common interface. When Intake is imported, it discovers all packages installed in the current environment that advertise ``'intake.drivers'`` in this way. +Most packages that define intake drivers have a dependency on ``intake`` +itself, for example in order to use intake's base classes. This can create a +ciruclar dependency: importing the package imports intake, which tries +to discover and import packages that define drivers. To avoid this pitfall, +just ensure that ``intake`` is imported first thing in your package's +``__init__.py``. This ensures that the driver-discovery code runs first. Note +that you are *not* required to make your package depend on intake. The rule is +that *if* you import ``intake`` you must import it first thing. If you do not +import intake, there is no circularity. + Configuration ''''''''''''' @@ -308,12 +331,12 @@ .. _remote data: http://dask.pydata.org/en/latest/remote-data-services.html More advanced usage, where a Dask loader does not already exist, will likely rely on -`dask.bytes.open_files`_ . Use this function to produce lazy ``OpenFile`` object for local +`fsspec.open_files`_ . Use this function to produce lazy ``OpenFile`` object for local or remote data, based on a URL, which will have a protocol designation and possibly contain glob "*" characters. Additional parameters may be passed to ``open_files``, which should, by convention, be supplied by a driver argument named ``storage_options`` (a dictionary). -.. _dask.bytes.open_files: http://dask.pydata.org/en/latest/bytes.html#dask.bytes.open_files +.. _fsspec.open_files: https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.open_files To use an ``OpenFile`` object, make it concrete by using a context: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/docs/source/plugin-directory.rst new/intake-0.5.4/docs/source/plugin-directory.rst --- old/intake-0.5.3/docs/source/plugin-directory.rst 2019-05-13 15:51:56.000000000 +0200 +++ new/intake-0.5.4/docs/source/plugin-directory.rst 2019-11-06 23:49:03.000000000 +0100 @@ -20,6 +20,7 @@ * `intake-hbase <https://github.com/intake/intake-hbase>`_: Apache HBase database (``hbase``) * `intake-iris <https://github.com/informatics-lab/intake-iris>`_ load netCDF and GRIB files with IRIS (``grib``, ``netcdf``) * `intake-mongo <https://github.com/intake/intake-mongo>`_: MongoDB noSQL query (``mongo``) +* `intake-nested-yaml-catalog <https://github.com/zillow/intake-nested-yaml-catalog/`_: Plugin supporting a single YAML hierarchical catalog to organize datasets and avoid a data swamp. (``yaml``, ``catalog``, ``nested``) * `intake-netflow <https://github.com/intake/intake-netflow>`_: Netflow packet format (``netflow``) * `intake-odbc <https://github.com/intake/intake-odbc>`_: ODBC database (``odbc``) * `intake-parquet <https://github.com/intake/intake-parquet>`_: Apache Parquet file format (``parquet``) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/docs/source/quickstart.rst new/intake-0.5.4/docs/source/quickstart.rst --- old/intake-0.5.3/docs/source/quickstart.rst 2019-05-06 20:02:34.000000000 +0200 +++ new/intake-0.5.4/docs/source/quickstart.rst 2019-09-18 17:12:53.000000000 +0200 @@ -112,7 +112,8 @@ It is often useful to move the descriptions of data sources out of your code and into a specification file that can be -reused and shared with other projects and people. Intake calls this a ":term:`Catalog`", which contains +reused and shared with other projects and people. Intake calls this a +":term:`Catalog file`", which contains a list of named entries describing how to load data sources. The ``intake example`` command, above, created a catalog file with the following :term:`YAML`-syntax content: @@ -128,7 +129,7 @@ metadata: origin_url: 'https://github.com/CivilServiceUSA/us-states/blob/v1.0.0/data/states.csv' -To load a catalog from a catalog file:: +To load a :term:`Catalog` from a :term:`Catalog file`:: >>> cat = intake.open_catalog('us_states.yml') >>> list(cat) @@ -156,7 +157,7 @@ be passed are limited to the user_parameters defined and the inputs expected by the specific driver - such usage is expected only from those already familiar with the specifics of the given format. In the following example, the user overrides the "csv_kwargs" keyword, which is described -in the documentation for :func:`CSVSource`_ and gets passed down to the CSV reader:: +in the documentation for :func:`CSVSource <intake.source.csv.CSVSource>` and gets passed down to the CSV reader:: # pass extra kwargs understood by the csv driver >>> intake.cat.states(csv_kwargs={'header': None, 'skiprows': 1}).read().head() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/docs/source/roadmap.rst new/intake-0.5.4/docs/source/roadmap.rst --- old/intake-0.5.3/docs/source/roadmap.rst 2019-05-07 19:38:38.000000000 +0200 +++ new/intake-0.5.4/docs/source/roadmap.rst 2019-09-18 17:12:53.000000000 +0200 @@ -3,9 +3,8 @@ Roadmap ======= -Some high-level work that we expect to be achieved ont he time-scale of months. This list -is not exhaustive, but -rather aims to whet the appetite for what Intake can be in the future. +Some high-level work that we expect to be achieved on the time-scale of months. This list +is not exhaustive, but rather aims to whet the appetite for what Intake can be in the future. Since Intake aims to be a community of data-oriented pythoneers, nothing written here is laid in stone, and users and devs are encouraged to make their opinions known! @@ -21,7 +20,8 @@ drivers that should be created by the Intake team, and those that might be contributed by the community. -The next type that we would specifically like to consider is machine learning model artefacts. +The next type that we would specifically like to consider is machine learning +model artifacts. Streaming Source ---------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/docs/source/tools.rst new/intake-0.5.4/docs/source/tools.rst --- old/intake-0.5.3/docs/source/tools.rst 2018-10-30 15:44:53.000000000 +0100 +++ new/intake-0.5.4/docs/source/tools.rst 2019-12-18 14:31:44.000000000 +0100 @@ -19,7 +19,7 @@ .. code-block:: yaml auth: - class: "intake.auth.base.BaseAuth" + cls: "intake.auth.base.BaseAuth" port: 5000 catalog_path: - /home/myusername/special_dir @@ -62,18 +62,31 @@ :: >>> intake-server --help - usage: intake-server [-h] [-p PORT] [--sys-exit-on-sigterm] FILE [FILE ...] + + usage: intake-server [-h] [-p PORT] [--list-entries] [--sys-exit-on-sigterm] + [--flatten] [--no-flatten] [-a ADDRESS] + FILE [FILE ...] Intake Catalog Server positional arguments: - FILE Name of catalog YAML file + FILE Name of catalog YAML file - optional arguments: - -h, --help show this help message and exit - -p PORT, --port PORT port number for server to listen on - --sys-exit-on-sigterm internal flag used during unit testing to ensure - .coverage file is written + optional arguments: + -h, --help show this help message and exit + -p PORT, --port PORT port number for server to listen on + --list-entries list catalog entries at startup + --sys-exit-on-sigterm + internal flag used during unit testing to ensure + .coverage file is written + --flatten + --no-flatten + -a ADDRESS, --address ADDRESS + address to use as a host, defaults to the address in + the configuration file, if provided otherwise localhost + usage: intake-server [-h] [-p PORT] [--list-entries] [--sys-exit-on-sigterm] + [--flatten] [--no-flatten] [-a ADDRESS] + FILE [FILE ...] To start the server with a local catalog file, use the following: @@ -268,4 +281,4 @@ CLI functions starting with ``intake cache`` and ``intake config`` are available to provide information about the system: the locations and value of configuration -parameters, and the state of cached files. \ No newline at end of file +parameters, and the state of cached files. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/_version.py new/intake-0.5.4/intake/_version.py --- old/intake-0.5.3/intake/_version.py 2019-08-07 20:46:08.000000000 +0200 +++ new/intake-0.5.4/intake/_version.py 2019-12-18 14:32:50.000000000 +0100 @@ -8,11 +8,11 @@ version_json = ''' { - "date": "2019-08-06T18:11:17-0400", + "date": "2019-12-11T16:42:49-0500", "dirty": false, "error": null, - "full-revisionid": "bf9095b761d9fddf22f12dfd1c82cd1660677b7e", - "version": "0.5.3" + "full-revisionid": "15b930492c5bedd6fd4ca43edf4b2fba701d61c0", + "version": "0.5.4" } ''' # END VERSION_JSON diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/auth/base.py new/intake-0.5.4/intake/auth/base.py --- old/intake-0.5.3/intake/auth/base.py 2019-04-08 15:20:12.000000000 +0200 +++ new/intake-0.5.4/intake/auth/base.py 2019-11-06 23:49:03.000000000 +0100 @@ -8,6 +8,10 @@ from ..utils import DictSerialiseMixin +class AuthenticationFailure(Exception): + pass + + class BaseAuth(DictSerialiseMixin): """Base class for authorization diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/catalog/base.py new/intake-0.5.4/intake/catalog/base.py --- old/intake-0.5.3/intake/catalog/base.py 2019-07-29 22:21:20.000000000 +0200 +++ new/intake-0.5.4/intake/catalog/base.py 2019-11-06 23:49:03.000000000 +0100 @@ -18,7 +18,7 @@ import msgpack -from ..auth.base import BaseClientAuth +from ..auth.base import BaseClientAuth, AuthenticationFailure from .remote import RemoteCatalogEntry from .utils import flatten, reload_on_change, RemoteCatalogError from ..source.base import DataSource @@ -544,7 +544,7 @@ http_args = copy.deepcopy(http_args) secure = http_args.pop('ssl', False) scheme = 'https' if secure else 'http' - url = url.replace('intake', scheme) + url = url.replace('intake', scheme, 1) if not url.endswith('/'): url = url + '/' self.url = url @@ -690,9 +690,15 @@ response = requests.get(self.info_url, **http_args) try: response.raise_for_status() + error = False except requests.HTTPError as err: - raise RemoteCatalogError( - "Failed to fetch metadata.") from err + if '403' in err.args[0]: + error = "Your current level of authentication does not have access" + else: + raise RemoteCatalogError( + "Failed to fetch metadata.") from err + if error: + raise AuthenticationFailure(error) info = msgpack.unpackb(response.content, **unpack_kwargs) self.metadata = info['metadata'] # The intake server now always provides a length, but the server may be diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/catalog/tests/test_auth_integration.py new/intake-0.5.4/intake/catalog/tests/test_auth_integration.py --- old/intake-0.5.3/intake/catalog/tests/test_auth_integration.py 2019-03-05 14:58:22.000000000 +0100 +++ new/intake-0.5.4/intake/catalog/tests/test_auth_integration.py 2019-11-06 23:49:03.000000000 +0100 @@ -16,6 +16,7 @@ from intake import Catalog from intake.auth.secret import SecretClientAuth +from intake.auth.base import AuthenticationFailure TMP_DIR = tempfile.mkdtemp() CONF_DIR = os.path.join(TMP_DIR, 'conf') @@ -80,5 +81,5 @@ def test_secret_auth_fail(intake_server_with_auth): auth = SecretClientAuth(secret='test_wrong_secret') - with pytest.raises(Exception): + with pytest.raises(AuthenticationFailure): Catalog(intake_server_with_auth, auth=auth) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/cli/server/__main__.py new/intake-0.5.4/intake/cli/server/__main__.py --- old/intake-0.5.3/intake/cli/server/__main__.py 2019-07-29 22:21:20.000000000 +0200 +++ new/intake-0.5.4/intake/cli/server/__main__.py 2019-12-18 14:31:44.000000000 +0100 @@ -39,6 +39,10 @@ help='Name of catalog YAML file') parser.add_argument('--flatten', dest='flatten', action='store_true') parser.add_argument('--no-flatten', dest='flatten', action='store_false') + parser.add_argument('-a', '--address', type=str, + default=conf.get('address', 'localhost'), + help='address to use as a host, defaults to the address ' + 'in the configuration file, if provided otherwise localhost') parser.set_defaults(flatten=True) args = parser.parse_args(argv[1:]) @@ -60,13 +64,13 @@ # This is not a good idea if the Catalog is huge. logger.info('Entries:' + ','.join(list(catalog))) - logger.info('Listening on port %d' % args.port) + logger.info('Listening on %s:%d' % (args.address, args.port)) server = IntakeServer(catalog) app = server.make_app() server.start_periodic_functions(close_idle_after=3600.0) - app.listen(args.port) + app.listen(args.port, address=args.address) try: tornado.ioloop.IOLoop.current().start() except KeyboardInterrupt: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/cli/server/server.py new/intake-0.5.4/intake/cli/server/server.py --- old/intake-0.5.3/intake/cli/server/server.py 2019-07-29 22:21:20.000000000 +0200 +++ new/intake-0.5.4/intake/cli/server/server.py 2019-09-18 17:12:53.000000000 +0200 @@ -105,7 +105,7 @@ else: start = int(page_offset) stop = int(page_offset) + int(page_size) - page = itertools.islice(cat.walk(depth=1).items(), start, stop) + page = itertools.islice(cat.items(), start, stop) for name, source in page: if self.auth.allow_access(head, source, self.catalog): info = source.describe().copy() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/cli/server/tests/test_server.py new/intake-0.5.4/intake/cli/server/tests/test_server.py --- old/intake-0.5.3/intake/cli/server/tests/test_server.py 2019-07-29 22:21:20.000000000 +0200 +++ new/intake-0.5.4/intake/cli/server/tests/test_server.py 2019-12-18 14:31:44.000000000 +0100 @@ -238,19 +238,68 @@ while True: try: requests.get('http://localhost:5000') - try: - yield 'intake://localhost:5000' - finally: - P.terminate() - P.wait() - shutil.rmtree(tmpdir) + yield 'intake://localhost:5000' break except: time.sleep(0.2) - assert time.time() - t < 10 + if time.time() - t > 10: + break + P.terminate() + P.wait() + shutil.rmtree(tmpdir) def test_flatten_flag(multi_server): cat = open_catalog(multi_server) assert list(cat) == ['cat1', 'cat2'] assert 'use_example1' in cat.cat1() + + [email protected]() +def port_server(tmpdir): + fn1 = make_path_posix(os.path.join(tmpdir, 'cat1.yaml')) + shutil.copyfile(catalog_file, fn1) + P = subprocess.Popen(['intake-server','--port', '5001', fn1]) + t = time.time() + while True: + try: + requests.get('http://localhost:5001') + yield 'intake://localhost:5001' + break + except: + time.sleep(0.2) + if time.time() - t > 10: + break + P.terminate() + P.wait() + shutil.rmtree(tmpdir) + + +def test_port_flag(port_server): + cat = open_catalog(port_server) + assert 'use_example1' in list(cat) + + [email protected]() +def address_server(tmpdir): + fn1 = make_path_posix(os.path.join(tmpdir, 'cat1.yaml')) + shutil.copyfile(catalog_file, fn1) + P = subprocess.Popen(['intake-server','--port', '5001', '--address', '0.0.0.0', fn1]) + t = time.time() + while True: + try: + requests.get('http://0.0.0.0:5001') + yield 'intake://0.0.0.0:5001' + break + except: + time.sleep(0.2) + if time.time() - t > 10: + break + P.terminate() + P.wait() + shutil.rmtree(tmpdir) + + +def test_address_flag(address_server): + cat = open_catalog(address_server) + assert 'use_example1' in list(cat) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/container/dataframe.py new/intake-0.5.4/intake/container/dataframe.py --- old/intake-0.5.3/intake/container/dataframe.py 2019-07-29 22:21:20.000000000 +0200 +++ new/intake-0.5.4/intake/container/dataframe.py 2019-11-06 23:49:03.000000000 +0100 @@ -4,6 +4,8 @@ # # The full license is in the LICENSE file, distributed with this software. #----------------------------------------------------------------------------- +from distutils.version import LooseVersion + from intake.source.base import Schema, DataSource from .base import RemoteSource, get_partition @@ -21,6 +23,7 @@ self.shape = tuple(kwargs['shape']) self.metadata = kwargs['metadata'] self.dtype = kwargs['dtype'] + self.verify = kwargs.get('verify', False) self._schema = Schema(npartitions=self.npartitions, extra_metadata=self.metadata, dtype=self.dtype, @@ -36,7 +39,10 @@ self.url, self.headers, self._source_id, self.container, i ) for i in range(self.npartitions)] - self.dataframe = dd.from_delayed(self.parts) + if LooseVersion(dask.__version__) < LooseVersion("2.5.0"): + self.dataframe = dd.from_delayed(self.parts) + else: + self.dataframe = dd.from_delayed(self.parts, verify_meta=self.verify) return self._schema def _get_partition(self, i): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/container/semistructured.py new/intake-0.5.4/intake/container/semistructured.py --- old/intake-0.5.3/intake/container/semistructured.py 2019-07-29 22:21:20.000000000 +0200 +++ new/intake-0.5.4/intake/container/semistructured.py 2019-11-06 23:49:03.000000000 +0100 @@ -70,12 +70,17 @@ return RemoteSequenceSource._data_to_source(b, path, encoder, **kwargs) @staticmethod - def _data_to_source(b, path, encoder=None, **kwargs): + def _data_to_source(b, path, encoder=None, storage_options=None, **kwargs): import dask.bag as db import posixpath from fsspec import open_files import dask + import pickle + import json from intake.source.textfiles import TextFilesSource + encoder = {None: str, 'str': str, 'json': json.dumps, + 'pickle': pickle.dumps}.get(encoder, encoder) + if not hasattr(b, 'to_textfiles'): try: b = db.from_sequence(b, npartitions=1) @@ -83,12 +88,12 @@ raise NotImplementedError files = open_files(posixpath.join(path, 'part.*'), mode='wt', - num=b.npartitions) + num=b.npartitions, **(storage_options or {})) dwrite = dask.delayed(write_file) out = [dwrite(part, f, encoder) for part, f in zip(b.to_delayed(), files)] dask.compute(out) - s = TextFilesSource(posixpath.join(path, 'part.*')) + s = TextFilesSource(posixpath.join(path, 'part.*'), storage_options=storage_options) return s diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/container/tests/test_persist.py new/intake-0.5.4/intake/container/tests/test_persist.py --- old/intake-0.5.3/intake/container/tests/test_persist.py 2019-04-01 16:53:29.000000000 +0200 +++ new/intake-0.5.4/intake/container/tests/test_persist.py 2019-12-18 14:31:44.000000000 +0100 @@ -44,6 +44,12 @@ assert s3 == s +def test_persist_with_nonnumeric_ttl_raises_error(temp_cache): + s = TextFilesSource("*.py") + with pytest.raises(ValueError, match="User-provided ttl was a string"): + s.persist(ttl='a string') + + class DummyDataframe(DataSource): name = 'dummy' container = 'dataframe' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/gui/source/defined_plots.py new/intake-0.5.4/intake/gui/source/defined_plots.py --- old/intake-0.5.3/intake/gui/source/defined_plots.py 2019-08-07 20:45:25.000000000 +0200 +++ new/intake-0.5.4/intake/gui/source/defined_plots.py 2019-12-18 14:31:44.000000000 +0100 @@ -54,7 +54,7 @@ self.select = pn.widgets.Select(options=self.options, height=30, align='center', width=200) self.desc = pn.pane.Str() - self.pane = pn.pane.HoloViews(self._plot_object(self.selected)) + self.pane = pn.Column(self._plot_object(self.selected)) self.show_desc = pn.widgets.Checkbox(value=False, width_policy='min', align='center') @@ -122,7 +122,7 @@ if event.name == 'value': if self.show_desc.value: self.desc.object = self._desc_contents(event.new) - self.pane.object = self._plot_object(event.new) + self.pane[0] = pn.panel(self._plot_object(event.new)) if event.name == 'options': self.instructions.object = self.instructions_contents diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/gui/source/tests/test_source_view.py new/intake-0.5.4/intake/gui/source/tests/test_source_view.py --- old/intake-0.5.3/intake/gui/source/tests/test_source_view.py 2019-08-07 20:45:25.000000000 +0200 +++ new/intake-0.5.4/intake/gui/source/tests/test_source_view.py 2019-12-18 14:31:44.000000000 +0100 @@ -84,9 +84,9 @@ if visible: assert plots.instructions.object == plots.instructions_contents assert plots.desc.object is None - assert plots.pane.object is None + assert len(plots.pane)<2 assert len(plots.children) == 3 - assert isinstance(plots.children[-1], pn.pane.HoloViews) + assert isinstance(plots.children[-1], pn.Column) assert plots.panel.objects == plots.children assert len(plots.watchers) == 2 else: @@ -108,9 +108,9 @@ "x: Year") else: assert plots.desc.object == None - assert plots.pane.object is not None + assert plots.pane.objects is not None assert len(plots.children) == 3 - assert isinstance(plots.children[-1], pn.pane.HoloViews) + assert isinstance(plots.children[-1], pn.Column) assert plots.panel.objects == plots.children assert len(plots.watchers) == 2 else: @@ -183,6 +183,6 @@ defined_plots.selected = 'violin_example' assert defined_plots.desc.object.startswith("kind: violin") assert len(defined_plots.children) == 2 - assert isinstance(defined_plots.children[1], pn.pane.HoloViews) - assert str(defined_plots.children[1].object) == str(defined_plots.pane.object) + assert isinstance(defined_plots.children[1], pn.Column) + assert str(defined_plots.children[1].objects) == str(defined_plots.pane.objects) assert defined_plots.panel.objects == defined_plots.children diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake/source/base.py new/intake-0.5.4/intake/source/base.py --- old/intake-0.5.3/intake/source/base.py 2019-08-01 21:01:24.000000000 +0200 +++ new/intake-0.5.4/intake/source/base.py 2019-12-18 14:31:44.000000000 +0100 @@ -259,26 +259,32 @@ return self.plot def persist(self, ttl=None, **kwargs): - """Save data from this source to local persistent storage""" + """Save data from this source to local persistent storage + + Parameters + ---------- + ttl: numeric, optional + Time to live in seconds. If provided, the original source will + be accessed and a new persisted version written transparently + when more than ``ttl`` seconds have passed since the old persisted + version was written. + kargs: passed to the _persist method on the base container. + """ from ..container import container_map from ..container.persist import PersistStore import time if 'original_tok' in self.metadata: raise ValueError('Cannot persist a source taken from the persist ' 'store') - method = container_map[self.container]._persist + if ttl is not None and not isinstance(ttl, (int, float)): + raise ValueError('Cannot persist using a time to live that is ' + f'non-numeric. User-provided ttl was {ttl}') store = PersistStore() - out = method(self, path=store.getdir(self), **kwargs) - out.description = self.description - metadata = {'timestamp': time.time(), - 'original_metadata': self.metadata, - 'original_source': self.__getstate__(), - 'original_name': self.name, - 'original_tok': self._tok, - 'persist_kwargs': kwargs, - 'ttl': ttl, - 'cat': {} if self.cat is None else self.cat.__getstate__()} - out.metadata = metadata + out = self._export(store.getdir(self), **kwargs) + out.metadata.update({ + 'ttl': ttl, + 'cat': {} if self.cat is None else self.cat.__getstate__() + }) out.name = self.name store.add(self._tok, out) return out @@ -293,6 +299,9 @@ add it to a catalog (``catalog.add(source)``) or get its YAML representation (``.yaml()``). """ + return self._export(path, **kwargs) + + def _export(self, path, **kwargs): from ..container import container_map import time method = container_map[self.container]._persist diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake.egg-info/PKG-INFO new/intake-0.5.4/intake.egg-info/PKG-INFO --- old/intake-0.5.3/intake.egg-info/PKG-INFO 2019-08-07 20:46:07.000000000 +0200 +++ new/intake-0.5.4/intake.egg-info/PKG-INFO 2019-12-18 14:32:50.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: intake -Version: 0.5.3 +Version: 0.5.4 Summary: Data load and catalog system Home-page: https://github.com/intake/intake Maintainer: Martin Durant @@ -71,7 +71,7 @@ Classifier: Programming Language :: Python :: 3.7 Requires-Python: >=3.6 Description-Content-Type: text/markdown +Provides-Extra: complete Provides-Extra: server -Provides-Extra: dataframe Provides-Extra: plot -Provides-Extra: complete +Provides-Extra: dataframe diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/intake.egg-info/requires.txt new/intake-0.5.4/intake.egg-info/requires.txt --- old/intake-0.5.3/intake.egg-info/requires.txt 2019-08-07 20:46:07.000000000 +0200 +++ new/intake-0.5.4/intake.egg-info/requires.txt 2019-12-18 14:32:50.000000000 +0100 @@ -6,7 +6,7 @@ pyyaml requests msgpack -fsspec +fsspec>=0.3.6 [complete] dask[dataframe] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/intake-0.5.3/requirements.txt new/intake-0.5.4/requirements.txt --- old/intake-0.5.3/requirements.txt 2019-07-29 22:21:20.000000000 +0200 +++ new/intake-0.5.4/requirements.txt 2019-09-18 17:12:53.000000000 +0200 @@ -7,4 +7,4 @@ pyyaml requests msgpack -fsspec +fsspec >=0.3.6
