XiaoHongbo-Hope commented on code in PR #7040: URL: https://github.com/apache/paimon/pull/7040#discussion_r2693252641
########## paimon-python/pypaimon/filesystem/pyarrow_file_io.py: ########## @@ -0,0 +1,512 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +import logging +import os +import subprocess +import uuid +from pathlib import Path +from typing import Any, Dict, List, Optional +from urllib.parse import splitport, urlparse + +import pyarrow +from packaging.version import parse +from pyarrow._fs import FileSystem + +from pypaimon.common.file_io import FileIO +from pypaimon.common.options import Options +from pypaimon.common.options.config import OssOptions, S3Options +from pypaimon.common.uri_reader import UriReaderFactory +from pypaimon.filesystem.local import PaimonLocalFileSystem +from pypaimon.schema.data_types import DataField, AtomicType, PyarrowFieldParser +from pypaimon.table.row.blob import BlobData, BlobDescriptor, Blob +from pypaimon.table.row.generic_row import GenericRow +from pypaimon.table.row.row_kind import RowKind +from pypaimon.write.blob_format_writer import BlobFormatWriter + + +class PyArrowFileIO(FileIO): + def __init__(self, path: str, catalog_options: Options): + self.properties = catalog_options + self.logger = logging.getLogger(__name__) + scheme, netloc, _ = self.parse_location(path) + self.uri_reader_factory = UriReaderFactory(catalog_options) + if scheme in {"oss"}: + self.filesystem = self._initialize_oss_fs(path) + elif scheme in {"s3", "s3a", "s3n"}: + self.filesystem = self._initialize_s3_fs() + elif scheme in {"hdfs", "viewfs"}: + self.filesystem = self._initialize_hdfs_fs(scheme, netloc) + elif scheme in {"file"}: Review Comment: > Maybe local file system is not needed here. removed ########## paimon-python/pypaimon/filesystem/local_file_io.py: ########## @@ -0,0 +1,444 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ +import logging +import os +import shutil +import threading +import uuid +from pathlib import Path +from typing import Any, Dict, Optional +from urllib.parse import urlparse + +import pyarrow +import pyarrow.fs + +from pypaimon.common.file_io import FileIO +from pypaimon.common.options import Options +from pypaimon.common.uri_reader import UriReaderFactory +from pypaimon.filesystem.local import PaimonLocalFileSystem +from pypaimon.schema.data_types import DataField, AtomicType, PyarrowFieldParser +from pypaimon.table.row.blob import BlobData, BlobDescriptor, Blob +from pypaimon.table.row.generic_row import GenericRow +from pypaimon.table.row.row_kind import RowKind +from pypaimon.write.blob_format_writer import BlobFormatWriter + + +class LocalFileIO(FileIO): + """ + Local file system implementation of FileIO. + """ + + RENAME_LOCK = threading.Lock() + + INSTANCE = None Review Comment: > Do we need INSTANCE variable? removed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
