Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-gcsfs for openSUSE:Factory checked in at 2022-10-17 14:57:25 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-gcsfs (Old) and /work/SRC/openSUSE:Factory/.python-gcsfs.new.2275 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-gcsfs" Mon Oct 17 14:57:25 2022 rev:12 rq:1009270 version:2022.8.2 Changes: -------- --- /work/SRC/openSUSE:Factory/python-gcsfs/python-gcsfs.changes 2022-07-04 11:32:50.764020334 +0200 +++ /work/SRC/openSUSE:Factory/.python-gcsfs.new.2275/python-gcsfs.changes 2022-10-17 14:57:28.806067398 +0200 @@ -1,0 +2,6 @@ +Sun Oct 9 09:33:42 UTC 2022 - Ben Greiner <c...@bnavigator.de> + +- Update to 2022.8.2 + * Try cloud auth by default (#479) + +------------------------------------------------------------------- Old: ---- gcsfs-2022.5.0-gh.tar.gz New: ---- gcsfs-2022.8.2-gh.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-gcsfs.spec ++++++ --- /var/tmp/diff_new_pack.QTaMbq/_old 2022-10-17 14:57:29.390068519 +0200 +++ /var/tmp/diff_new_pack.QTaMbq/_new 2022-10-17 14:57:29.394068527 +0200 @@ -16,10 +16,8 @@ # -%{?!python_module:%define python_module() python3-%{**}} -%define skip_python2 1 Name: python-gcsfs -Version: 2022.5.0 +Version: 2022.8.2 Release: 0 Summary: Filesystem interface over GCS License: BSD-3-Clause @@ -27,6 +25,7 @@ URL: https://github.com/fsspec/gcsfs # Use the GitHub tarball for test data Source: https://github.com/fsspec/gcsfs/archive/refs/tags/%{version}.tar.gz#/gcsfs-%{version}-gh.tar.gz +BuildRequires: %{python_module base >= 3.7} BuildRequires: %{python_module setuptools} BuildRequires: fdupes BuildRequires: python-rpm-macros ++++++ gcsfs-2022.5.0-gh.tar.gz -> gcsfs-2022.8.2-gh.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.5.0/.github/workflows/ci.yml new/gcsfs-2022.8.2/.github/workflows/ci.yml --- old/gcsfs-2022.5.0/.github/workflows/ci.yml 2022-05-20 16:37:06.000000000 +0200 +++ new/gcsfs-2022.8.2/.github/workflows/ci.yml 2022-09-01 03:12:31.000000000 +0200 @@ -26,7 +26,7 @@ - name: Install dependencies shell: bash -l {0} run: | - conda install -c conda-forge pytest ujson requests decorator google-auth vcrpy aiohttp google-auth-oauthlib flake8 black google-cloud-core google-api-core google-api-python-client -y + conda install -c conda-forge pytest ujson requests decorator google-auth aiohttp google-auth-oauthlib flake8 black google-cloud-core google-api-core google-api-python-client -y pip install git+https://github.com/fsspec/filesystem_spec --no-deps conda list conda --version diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.5.0/docs/source/changelog.rst new/gcsfs-2022.8.2/docs/source/changelog.rst --- old/gcsfs-2022.5.0/docs/source/changelog.rst 2022-05-20 16:37:06.000000000 +0200 +++ new/gcsfs-2022.8.2/docs/source/changelog.rst 2022-09-01 03:12:31.000000000 +0200 @@ -1,6 +1,16 @@ Changelog ========= +2022.8.1 +-------- + +* don't install prerelease aiohttp (#490) + +2022.7.1 +-------- + +* Try cloud auth by default (#479) + 2022.5.0 -------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.5.0/docs/source/index.rst new/gcsfs-2022.8.2/docs/source/index.rst --- old/gcsfs-2022.5.0/docs/source/index.rst 2022-05-20 16:37:06.000000000 +0200 +++ new/gcsfs-2022.8.2/docs/source/index.rst 2022-09-01 03:12:31.000000000 +0200 @@ -163,6 +163,22 @@ works, but you might find the implementation interesting. +Proxy +----- + +``gcsfs`` uses ``aiohttp`` for calls to the storage api, which by default +ignores ``HTTP_PROXY/HTTPS_PROXY`` environment variables. To read +proxy settings from the environment provide ``session_kwargs`` as follows: + +.. code-block:: python + + fs = GCSFileSystem(project='my-google-project', session_kwargs={'trust_env': True}) + +For further reference check `aiohttp proxy support`_. + +.. _aiohttp proxy support: https://docs.aiohttp.org/en/stable/client_advanced.html?highlight=proxy#proxy-support + + Contents ======== diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.5.0/gcsfs/_version.py new/gcsfs-2022.8.2/gcsfs/_version.py --- old/gcsfs-2022.5.0/gcsfs/_version.py 2022-05-20 16:37:06.000000000 +0200 +++ new/gcsfs-2022.8.2/gcsfs/_version.py 2022-09-01 03:12:31.000000000 +0200 @@ -22,9 +22,9 @@ # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). - git_refnames = "2022.5.0" - git_full = "58323d27ef7bdae8d81201b86d8a196d0a9632c8" - git_date = "2022-05-20 10:37:06 -0400" + git_refnames = "2022.8.2" + git_full = "7108ade1639af9616843fa906a3e7060bf95e37e" + git_date = "2022-08-31 21:12:31 -0400" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.5.0/gcsfs/core.py new/gcsfs-2022.8.2/gcsfs/core.py --- old/gcsfs-2022.5.0/gcsfs/core.py 2022-05-20 16:37:06.000000000 +0200 +++ new/gcsfs-2022.8.2/gcsfs/core.py 2022-09-01 03:12:31.000000000 +0200 @@ -642,7 +642,9 @@ if "/" in path and create_parents and await self._exists(bucket): # nothing to do return - if "/" in path and not create_parents and not await self._exists(bucket): + if "/" in path and not create_parents: + if await self._exists(bucket): + return raise FileNotFoundError(bucket) json_data = {"name": bucket} @@ -798,6 +800,9 @@ ): """Set/delete/add writable metadata attributes + Note: uses PATCH method (update), leaving unedited keys alone. + fake-gcs-server:latest does not seem to support this. + Parameters --------- content_type: str @@ -843,7 +848,6 @@ json=i_json, json_out=True, ) - (await self._info(path))["metadata"] = o_json.get("metadata", {}) return o_json.get("metadata", {}) setxattrs = sync_wrapper(_setxattrs) @@ -1082,54 +1086,55 @@ async def _find(self, path, withdirs=False, detail=False, prefix="", **kwargs): path = self._strip_protocol(path) bucket, key = self.split_path(path) - out, _ = await self._do_list_objects( - path, - delimiter=None, - prefix=prefix, - ) - if not prefix and not out and key: - try: - out = [ - await self._get_object( - path, - ) - ] - except FileNotFoundError: - out = [] - dirs = [] - sdirs = set() + + if prefix: + _path = "" if not key else key.rstrip("/") + "/" + _prefix = f"{_path}{prefix}" + else: + _prefix = key + + objects, _ = await self._do_list_objects(bucket, delimiter="", prefix=_prefix) + + dirs = {} cache_entries = {} - for o in out: - par = o["name"] - while par: - par = self._parent(par) - if par not in sdirs: - if len(par) < len(path): - break - sdirs.add(par) - dirs.append( - { - "Key": self.split_path(par)[1], - "Size": 0, - "name": par, - "StorageClass": "DIRECTORY", - "type": "directory", - "size": 0, - } - ) - # Don't cache "folder-like" objects (ex: "Create Folder" in GCS console) to prevent - # masking subfiles in subsequent requests. - if not o["name"].endswith("/"): - cache_entries.setdefault(par, []).append(o) + + for obj in objects: + parent = self._parent(obj["name"]) + previous = obj + + while parent: + dir_key = self.split_path(parent)[1] + if not dir_key: + break + + dirs[parent] = { + "Key": dir_key, + "Size": 0, + "name": parent, + "StorageClass": "DIRECTORY", + "type": "directory", + "size": 0, + } + + if len(parent) < len(path): + # don't go above the requested level + break + + cache_entries.setdefault(parent, []).append(previous) + + previous = dirs[parent] + parent = self._parent(parent) + if not prefix: self.dircache.update(cache_entries) if withdirs: - out = sorted(out + dirs, key=lambda x: x["name"]) + objects = sorted(objects + list(dirs.values()), key=lambda x: x["name"]) if detail: - return {o["name"]: o for o in out} - return [o["name"] for o in out] + return {o["name"]: o for o in objects} + + return [o["name"] for o in objects] @retry_request(retries=retries) async def _get_file_request( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.5.0/gcsfs/credentials.py new/gcsfs-2022.8.2/gcsfs/credentials.py --- old/gcsfs-2022.5.0/gcsfs/credentials.py 2022-05-20 16:37:06.000000000 +0200 +++ new/gcsfs-2022.8.2/gcsfs/credentials.py 2022-09-01 03:12:31.000000000 +0200 @@ -93,7 +93,14 @@ def _connect_cloud(self): self.credentials = gauth.compute_engine.Credentials() + if not self.credentials.valid: + raise ValueError("Invalid gcloud credentials") + def _connect_cache(self): + + if len(self.tokens) == 0: + raise ValueError("No cached tokens") + project, access = self.project, self.access if (project, access) in self.tokens: credentials = self.tokens[(project, access)] @@ -220,12 +227,15 @@ self.connect(method=meth) logger.debug("Connected with method %s", meth) break - except google.auth.exceptions.GoogleAuthError as e: + except (google.auth.exceptions.GoogleAuthError, ValueError) as e: # GoogleAuthError is the base class for all authentication # errors logger.debug( 'Connection with method "%s" failed' % meth, exc_info=e ) + # Reset credentials if they were set but the authentication failed + # (reverts to 'anon' behavior) + self.credentials = None else: # Since the 'anon' connection method should always succeed, # getting here means something has gone terribly wrong. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.5.0/gcsfs/tests/test_core.py new/gcsfs-2022.8.2/gcsfs/tests/test_core.py --- old/gcsfs-2022.5.0/gcsfs/tests/test_core.py 2022-05-20 16:37:06.000000000 +0200 +++ new/gcsfs-2022.8.2/gcsfs/tests/test_core.py 2022-09-01 03:12:31.000000000 +0200 @@ -15,11 +15,12 @@ from gcsfs.tests.settings import TEST_BUCKET, TEST_PROJECT, TEST_REQUESTER_PAYS_BUCKET from gcsfs.tests.conftest import ( - files, - csv_files, - text_files, a, + allfiles, b, + csv_files, + files, + text_files, ) from gcsfs.tests.utils import tempdir, tmpfile from gcsfs.core import GCSFileSystem, quote_plus @@ -790,6 +791,9 @@ def test_attrs(gcs): + if not gcs.on_google: + # https://github.com/fsspec/gcsfs/pull/479 + pytest.skip("fake-gcs-server:latest only supports PUT for metadata, not PATCH") gcs.touch(a) assert "metadata" not in gcs.info(a) with pytest.raises(KeyError): @@ -979,26 +983,42 @@ gcs.dircache[f"{TEST_BUCKET}/a"] -def test_find_with_prefix_partial_cache(gcs): +@pytest.mark.parametrize("with_cache", (False, True)) +def test_find_with_prefix_partial_cache(gcs, with_cache): base_dir = f"{TEST_BUCKET}/test_find_with_prefix" gcs.touch(base_dir + "/test_1") gcs.touch(base_dir + "/test_2") - for with_cache in (True, False): - # Test once with cached, and once with no cache - gcs.invalidate_cache() - if with_cache: - gcs.ls(base_dir) - precache = dict(gcs.dircache) - assert gcs.find(base_dir, prefix="non_existent_") == [] - assert gcs.find(base_dir, prefix="test_") == [ - base_dir + "/test_1", - base_dir + "/test_2", - ] - assert dict(gcs.dircache) == precache # find qwith prefix shouldn't touch cache - assert gcs.find(base_dir + "/test_1") == [base_dir + "/test_1"] - assert gcs.find(base_dir + "/non_existent") == [] - assert gcs.find(base_dir + "/non_existent", prefix="more_non_existent") == [] + gcs.invalidate_cache() + if with_cache: + gcs.ls(base_dir) + precache = dict(gcs.dircache) + assert gcs.find(base_dir, prefix="non_existent_") == [] + assert gcs.find(base_dir, prefix="test_") == [ + base_dir + "/test_1", + base_dir + "/test_2", + ] + assert dict(gcs.dircache) == precache # find qwith prefix shouldn't touch cache + assert gcs.find(base_dir + "/test_1") == [base_dir + "/test_1"] + assert gcs.find(base_dir + "/non_existent") == [] + assert gcs.find(base_dir + "/non_existent", prefix="more_non_existent") == [] + + +def test_find_dircache(gcs): + """Running `ls` after find should not corrupt the dir cache""" + assert set(gcs.find(TEST_BUCKET)) == {f"{TEST_BUCKET}/{path}" for path in allfiles} + assert set(gcs.ls(TEST_BUCKET)) == { + f"{TEST_BUCKET}/test", + f"{TEST_BUCKET}/nested", + f"{TEST_BUCKET}/2014-01-01.csv", + f"{TEST_BUCKET}/2014-01-02.csv", + f"{TEST_BUCKET}/2014-01-03.csv", + } + assert set(gcs.ls(f"{TEST_BUCKET}/nested")) == { + f"{TEST_BUCKET}/nested/file1", + f"{TEST_BUCKET}/nested/file2", + f"{TEST_BUCKET}/nested/nested2", + } def test_percent_file_name(gcs): @@ -1070,12 +1090,15 @@ def test_mkdir_with_path(gcs): + with pytest.raises(FileNotFoundError): - gcs.mkdir("new/path", create_parents=False) - assert not gcs.exists("new") - gcs.mkdir("new/path", create_parents=True) - assert gcs.exists("new") + gcs.mkdir(f"{TEST_BUCKET + 'new'}/path", create_parents=False) + assert not gcs.exists(f"{TEST_BUCKET + 'new'}") + gcs.mkdir(f"{TEST_BUCKET + 'new'}/path", create_parents=True) + assert gcs.exists(f"{TEST_BUCKET + 'new'}") # these lines do nothing, but should not fail - gcs.mkdir("new/path", create_parents=False) - gcs.mkdir("new/path", create_parents=True) + gcs.mkdir(f"{TEST_BUCKET + 'new'}/path", create_parents=False) + gcs.mkdir(f"{TEST_BUCKET + 'new'}/path", create_parents=True) + + gcs.rm(f"{TEST_BUCKET + 'new'}", recursive=True) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.5.0/requirements.txt new/gcsfs-2022.8.2/requirements.txt --- old/gcsfs-2022.5.0/requirements.txt 2022-05-20 16:37:06.000000000 +0200 +++ new/gcsfs-2022.8.2/requirements.txt 2022-09-01 03:12:31.000000000 +0200 @@ -3,5 +3,5 @@ google-cloud-storage requests decorator>4.1.2 -fsspec==2022.5.0 -aiohttp<4 +fsspec==2022.8.2 +aiohttp!=4.0.0a0, !=4.0.0a1