Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-gcsfs for openSUSE:Factory checked in at 2022-11-01 13:43:28 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-gcsfs (Old) and /work/SRC/openSUSE:Factory/.python-gcsfs.new.2275 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-gcsfs" Tue Nov 1 13:43:28 2022 rev:13 rq:1032641 version:2022.10.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-gcsfs/python-gcsfs.changes 2022-10-17 14:57:28.806067398 +0200 +++ /work/SRC/openSUSE:Factory/.python-gcsfs.new.2275/python-gcsfs.changes 2022-11-01 13:43:36.220226583 +0100 @@ -1,0 +2,6 @@ +Mon Oct 31 23:26:44 UTC 2022 - Matej Cepl <mc...@suse.com> + +- Update to 2022.10.0: + - bump fsspec to 2022.10.0 + +------------------------------------------------------------------- Old: ---- gcsfs-2022.8.2-gh.tar.gz New: ---- gcsfs-2022.10.0-gh.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-gcsfs.spec ++++++ --- /var/tmp/diff_new_pack.1FA9NZ/_old 2022-11-01 13:43:36.684229051 +0100 +++ /var/tmp/diff_new_pack.1FA9NZ/_new 2022-11-01 13:43:36.688229073 +0100 @@ -17,7 +17,7 @@ Name: python-gcsfs -Version: 2022.8.2 +Version: 2022.10.0 Release: 0 Summary: Filesystem interface over GCS License: BSD-3-Clause ++++++ gcsfs-2022.8.2-gh.tar.gz -> gcsfs-2022.10.0-gh.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.8.2/.github/workflows/ci.yml new/gcsfs-2022.10.0/.github/workflows/ci.yml --- old/gcsfs-2022.8.2/.github/workflows/ci.yml 2022-09-01 03:12:31.000000000 +0200 +++ new/gcsfs-2022.10.0/.github/workflows/ci.yml 2022-10-19 18:54:45.000000000 +0200 @@ -1,6 +1,6 @@ name: CI -on: [push, pull_request] +on: [push, pull_request, workflow_dispatch] jobs: test: @@ -26,7 +26,7 @@ - name: Install dependencies shell: bash -l {0} run: | - conda install -c conda-forge pytest ujson requests decorator google-auth aiohttp google-auth-oauthlib flake8 black google-cloud-core google-api-core google-api-python-client -y + conda install -c conda-forge pytest ujson requests decorator google-auth aiohttp google-auth-oauthlib google-cloud-core google-api-core google-api-python-client -y pip install git+https://github.com/fsspec/filesystem_spec --no-deps conda list conda --version @@ -41,8 +41,10 @@ export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/gcsfs/tests/fake-secret.json py.test -vv gcsfs - - name: Run pre-commit hooks - shell: bash -l {0} - run: | - pip install pre-commit - pre-commit run --all-file + lint: + name: lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - uses: pre-commit/action@v2.0.0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.8.2/docs/source/changelog.rst new/gcsfs-2022.10.0/docs/source/changelog.rst --- old/gcsfs-2022.8.2/docs/source/changelog.rst 2022-09-01 03:12:31.000000000 +0200 +++ new/gcsfs-2022.10.0/docs/source/changelog.rst 2022-10-19 18:54:45.000000000 +0200 @@ -1,6 +1,11 @@ Changelog ========= +2022.10.0 +--------- + +* bump fsspec to 2022.10.0 (#503) + 2022.8.1 -------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.8.2/gcsfs/_version.py new/gcsfs-2022.10.0/gcsfs/_version.py --- old/gcsfs-2022.8.2/gcsfs/_version.py 2022-09-01 03:12:31.000000000 +0200 +++ new/gcsfs-2022.10.0/gcsfs/_version.py 2022-10-19 18:54:45.000000000 +0200 @@ -22,9 +22,9 @@ # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). - git_refnames = "2022.8.2" - git_full = "7108ade1639af9616843fa906a3e7060bf95e37e" - git_date = "2022-08-31 21:12:31 -0400" + git_refnames = "2022.10.0" + git_full = "1d34b2ef2305dd4f328e3cac527a437582226e12" + git_date = "2022-10-19 12:54:45 -0400" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.8.2/gcsfs/core.py new/gcsfs-2022.10.0/gcsfs/core.py --- old/gcsfs-2022.8.2/gcsfs/core.py 2022-09-01 03:12:31.000000000 +0200 +++ new/gcsfs-2022.10.0/gcsfs/core.py 2022-10-19 18:54:45.000000000 +0200 @@ -22,6 +22,7 @@ from .checkers import get_consistency_checker from .credentials import GoogleCredentials from . import __version__ as version +from urllib.parse import quote as quote_urllib logger = logging.getLogger("gcsfs") @@ -52,15 +53,6 @@ GCS_MAX_BLOCK_SIZE = 2**28 DEFAULT_BLOCK_SIZE = 5 * 2**20 - -QUOTE_TABLE = str.maketrans( - { - "%": "%25", - "/": "%2F", - " ": "%20", - } -) - SUPPORTED_FIXED_KEY_METADATA = { "content_encoding": "contentEncoding", "cache_control": "cacheControl", @@ -70,12 +62,10 @@ } -def quote_plus(s): +def quote(s): """ - Convert some URL elements to be HTTP-safe. - - Not the same as in urllib, because, for instance, parentheses and commas - are passed through. + Quote characters to be safe for URL paths. + Also quotes '/'. Parameters ---------- @@ -85,7 +75,8 @@ ------- corrected URL """ - return s.translate(QUOTE_TABLE) + # Encode everything, including slashes + return quote_urllib(s, safe="") def norm_path(path): @@ -118,6 +109,16 @@ ) +def _chunks(lst, n): + """ + Yield evenly-sized chunks from a list. + + Implementation based on https://stackoverflow.com/a/312464. + """ + for i in range(0, len(lst), n): + yield lst[i : i + n] + + class GCSFileSystem(AsyncFileSystem): r""" Connect to Google Cloud Storage. @@ -358,7 +359,7 @@ path = self.base + path if args: - path = path.format(*[quote_plus(p) for p in args]) + path = path.format(*[quote(p) for p in args]) return path @retry_request(retries=retries) @@ -388,7 +389,6 @@ self, method, path, *args, json_out=False, info_out=False, **kwargs ): logger.debug(f"{method.upper()}: {path}, {args}, {kwargs.get('headers')}") - status, headers, info, contents = await self._request( method, path, *args, **kwargs ) @@ -770,7 +770,7 @@ """Get HTTP URL of the given path""" u = "{}/download/storage/v1/b/{}/o/{}?alt=media" bucket, object = self.split_path(path) - object = quote_plus(object) + object = quote(object) return u.format(self._location, bucket, object) async def _cat_file(self, path, start=None, end=None, **kwargs): @@ -919,37 +919,44 @@ "Content-Type: application/json\n" "accept: application/json\ncontent-length: 0\n" ) - body = "".join( - [ - template.format( - i=i + 1, - bucket=p.split("/", 1)[0], - key=quote_plus(p.split("/", 1)[1]), - ) - for i, p in enumerate(paths) - ] - ) - headers, content = await self._call( - "POST", - f"{self._location}/batch/storage/v1", - headers={ - "Content-Type": 'multipart/mixed; boundary="==========' - '=====7330845974216740156=="' - }, - data=body + "\n--===============7330845974216740156==--", - ) + errors = [] + # Splitting requests into 100 chunk batches + # See https://cloud.google.com/storage/docs/batch + for chunk in _chunks(paths, 100): + body = "".join( + [ + template.format( + i=i + 1, + bucket=p.split("/", 1)[0], + key=quote(p.split("/", 1)[1]), + ) + for i, p in enumerate(chunk) + ] + ) + headers, content = await self._call( + "POST", + f"{self._location}/batch/storage/v1", + headers={ + "Content-Type": 'multipart/mixed; boundary="==========' + '=====7330845974216740156=="' + }, + data=body + "\n--===============7330845974216740156==--", + ) + + boundary = headers["Content-Type"].split("=", 1)[1] + parents = [self._parent(p) for p in paths] + [self.invalidate_cache(parent) for parent in parents + list(paths)] + txt = content.decode() + if any( + not ("200 OK" in c or "204 No Content" in c) + for c in txt.split(boundary)[1:-1] + ): + pattern = '"message": "([^"]+)"' + out = set(re.findall(pattern, txt)) + errors.extend(out) - boundary = headers["Content-Type"].split("=", 1)[1] - parents = [self._parent(p) for p in paths] - [self.invalidate_cache(parent) for parent in parents + list(paths)] - txt = content.decode() - if any( - not ("200 OK" in c or "204 No Content" in c) - for c in txt.split(boundary)[1:-1] - ): - pattern = '"message": "([^"]+)"' - out = set(re.findall(pattern, txt)) - raise OSError(out) + if errors: + raise OSError(errors) @property def on_google(self): @@ -1104,7 +1111,7 @@ while parent: dir_key = self.split_path(parent)[1] - if not dir_key: + if not dir_key or len(parent) < len(path): break dirs[parent] = { @@ -1116,10 +1123,6 @@ "size": 0, } - if len(parent) < len(path): - # don't go above the requested level - break - cache_entries.setdefault(parent, []).append(previous) previous = dirs[parent] @@ -1462,7 +1465,7 @@ uid = re.findall("upload_id=([^&=?]+)", self.location) self.gcsfs.call( "DELETE", - f"{self.fs._location}/upload/storage/v1/b/{quote_plus(self.bucket)}/o", + f"{self.fs._location}/upload/storage/v1/b/{quote(self.bucket)}/o", params={"uploadType": "resumable", "upload_id": uid}, json_out=True, ) @@ -1558,7 +1561,7 @@ j.update(_convert_fixed_key_metadata(fixed_key_metadata)) headers, _ = await fs._call( method="POST", - path=f"{fs._location}/upload/storage/v1/b/{quote_plus(bucket)}/o", + path=f"{fs._location}/upload/storage/v1/b/{quote(bucket)}/o", uploadType="resumable", json=j, headers={"X-Upload-Content-Type": content_type}, @@ -1581,7 +1584,7 @@ fixed_key_metadata=None, ): checker = get_consistency_checker(consistency) - path = f"{fs._location}/upload/storage/v1/b/{quote_plus(bucket)}/o" + path = f"{fs._location}/upload/storage/v1/b/{quote(bucket)}/o" metadata = {"name": key} if metadatain is not None: metadata["metadata"] = metadatain diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.8.2/gcsfs/credentials.py new/gcsfs-2022.10.0/gcsfs/credentials.py --- old/gcsfs-2022.8.2/gcsfs/credentials.py 2022-09-01 03:12:31.000000000 +0200 +++ new/gcsfs-2022.10.0/gcsfs/credentials.py 2022-10-19 18:54:45.000000000 +0200 @@ -92,9 +92,12 @@ def _connect_cloud(self): self.credentials = gauth.compute_engine.Credentials() - - if not self.credentials.valid: - raise ValueError("Invalid gcloud credentials") + try: + with requests.Session() as session: + req = Request(session) + self.credentials.refresh(req) + except gauth.exceptions.RefreshError as error: + raise ValueError("Invalid gcloud credentials") from error def _connect_cache(self): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.8.2/gcsfs/retry.py new/gcsfs-2022.10.0/gcsfs/retry.py --- old/gcsfs-2022.8.2/gcsfs/retry.py 2022-09-01 03:12:31.000000000 +0200 +++ new/gcsfs-2022.10.0/gcsfs/retry.py 2022-10-19 18:54:45.000000000 +0200 @@ -77,9 +77,9 @@ """ if status >= 400: if args: - from .core import quote_plus + from .core import quote - path = path.format(*[quote_plus(p) for p in args]) + path = path.format(*[quote(p) for p in args]) if status == 404: raise FileNotFoundError(path) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.8.2/gcsfs/tests/test_core.py new/gcsfs-2022.10.0/gcsfs/tests/test_core.py --- old/gcsfs-2022.8.2/gcsfs/tests/test_core.py 2022-09-01 03:12:31.000000000 +0200 +++ new/gcsfs-2022.10.0/gcsfs/tests/test_core.py 2022-10-19 18:54:45.000000000 +0200 @@ -23,7 +23,7 @@ text_files, ) from gcsfs.tests.utils import tempdir, tmpfile -from gcsfs.core import GCSFileSystem, quote_plus +from gcsfs.core import GCSFileSystem, quote from gcsfs.credentials import GoogleCredentials import gcsfs.checkers from gcsfs import __version__ as version @@ -204,6 +204,22 @@ assert not gcs.exists(TEST_BUCKET + files[-1]) +def test_rm_chunked_batch(gcs): + files = [f"{TEST_BUCKET}/t{i}" for i in range(303)] + for fn in files: + gcs.touch(fn) + + files_created = gcs.find(TEST_BUCKET) + for fn in files: + assert fn in files_created + + gcs.rm(files) + + files_removed = gcs.find(TEST_BUCKET) + for fn in files: + assert fn not in files_removed + + def test_file_access(gcs): fn = TEST_BUCKET + "/nested/file1" data = b"hello\n" @@ -290,7 +306,7 @@ fn = TEST_BUCKET + "/nested/file1" url = gcs.url(fn) assert "http" in url - assert quote_plus("nested/file1") in url + assert quote("nested/file1") in url with gcs.open(fn) as f: assert "http" in f.url() @@ -460,6 +476,36 @@ assert gcs.cat(protocol + TEST_BUCKET + "/temp_dir/accounts.1.json") == data1 +def test_special_characters_filename(gcs: GCSFileSystem): + special_filename = """'!"`#$%&'()+,-.<=>?@[]^_{}~/'""" + full_path = TEST_BUCKET + "/" + special_filename + gcs.touch(full_path) + info = gcs.info(full_path) + assert info["name"] == full_path + # Normal cat currently doesn't work with special characters, + # because it invokes expand_path (and in turn glob) without escaping the characters. + # This would need to be fixed in fsspec. + assert gcs.cat_file(full_path) == b"" + + +def test_slash_filename(gcs: GCSFileSystem): + slash_filename = """abc/def""" + full_path = TEST_BUCKET + "/" + slash_filename + gcs.touch(full_path) + info = gcs.info(full_path) + assert info["name"] == full_path + assert gcs.cat_file(full_path) == b"" + + +def test_hash_filename(gcs: GCSFileSystem): + slash_filename = """a#b#c""" + full_path = TEST_BUCKET + "/" + slash_filename + gcs.touch(full_path) + info = gcs.info(full_path) + assert info["name"] == full_path + assert gcs.cat_file(full_path) == b"" + + def test_errors(gcs): with pytest.raises((IOError, OSError)): gcs.open(TEST_BUCKET + "/tmp/test/shfoshf", "rb") @@ -1102,3 +1148,12 @@ gcs.mkdir(f"{TEST_BUCKET + 'new'}/path", create_parents=True) gcs.rm(f"{TEST_BUCKET + 'new'}", recursive=True) + + +def test_deep_find_wthdirs(gcs): + gcs.touch(f"{TEST_BUCKET}/deep/nested/dir") + assert gcs.find(f"{TEST_BUCKET}/deep/nested") == [f"{TEST_BUCKET}/deep/nested/dir"] + assert gcs.find(f"{TEST_BUCKET}/deep/nested", withdirs=True) == [ + f"{TEST_BUCKET}/deep/nested", + f"{TEST_BUCKET}/deep/nested/dir", + ] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/gcsfs-2022.8.2/requirements.txt new/gcsfs-2022.10.0/requirements.txt --- old/gcsfs-2022.8.2/requirements.txt 2022-09-01 03:12:31.000000000 +0200 +++ new/gcsfs-2022.10.0/requirements.txt 2022-10-19 18:54:45.000000000 +0200 @@ -3,5 +3,5 @@ google-cloud-storage requests decorator>4.1.2 -fsspec==2022.8.2 +fsspec==2022.10.0 aiohttp!=4.0.0a0, !=4.0.0a1