bolkedebruin commented on code in PR #35612: URL: https://github.com/apache/airflow/pull/35612#discussion_r1393804189
########## airflow/io/path.py: ########## @@ -0,0 +1,396 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import contextlib +import functools +import os +import shutil +import typing +from pathlib import PurePath +from urllib.parse import SplitResult, urlsplit + +from fsspec.core import split_protocol +from fsspec.utils import stringify_path +from upath.implementations.cloud import CloudPath, _CloudAccessor +from upath.registry import get_upath_class + +from airflow.io.store import ObjectStore, attach +from airflow.io.utils.stat import stat_result + +if typing.TYPE_CHECKING: + from fsspec import AbstractFileSystem + + +PT = typing.TypeVar("PT", bound="ObjectStoragePath") + +default = "file" + + +class _AirflowCloudAccessor(_CloudAccessor): + _store: ObjectStore + _conn_id: str | None + + __slots__ = ("_store", "_conn_id") + + def __init__(self, parsed_url: SplitResult | None, **kwargs: typing.Any) -> None: + _store = kwargs.pop("store", None) + conn_id = kwargs.pop("conn_id", None) + if _store: + self._store = _store + elif parsed_url and parsed_url.scheme: + self._store = attach(parsed_url.scheme, conn_id) # todo add kwargs as storage_options + else: + self._store = attach(default, conn_id) + self._conn_id = conn_id + + @property + def _fs(self) -> AbstractFileSystem: + return self._store.fs + + def __eq__(self, other): + return isinstance(other, _AirflowCloudAccessor) and self._store == other._store + + +class ObjectStoragePath(CloudPath): Review Comment: You can do operations on local files, http (with an `attach`), dbfs (with an `attach`). It just means that such a Path will exhibit the semantics of object storage. This was also the case with the previous implementation. We could extend the implementation with a registry - as `upath` does - to support different semantics. However, dus to the nature of `pathlib.Path` and therefore `upath` it just means repeating quite a lot of code and not being able to fully rely upon upstream implementations. In that way `ObjectStoragePath` is more true to its behavior than `StoragePath` would be. An alternative could be `OPath`, but that is also a bit weird I think. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
