shyjsarah commented on code in PR #7902:
URL: https://github.com/apache/paimon/pull/7902#discussion_r3272445994


##########
paimon-python/pypaimon/filesystem/jindo_file_system_handler.py:
##########
@@ -24,17 +24,83 @@
 
 try:
     import pyjindo.fs as jfs
+    import pyjindo.ossfs as jossfs
     import pyjindo.util as jutil
     JINDO_AVAILABLE = True
 except ImportError:
     JINDO_AVAILABLE = False
     jfs = None
+    jossfs = None
     jutil = None
 
 from pypaimon.common.options import Options
 from pypaimon.common.options.config import OssOptions
 
 
+def build_jindo_config(catalog_options: Options):
+    """Build a pyjindo ``Config`` from OSS catalog options.
+
+    Shared by ``JindoFileSystemHandler`` (the PyArrow FileIO path) and
+    ``create_jindo_oss_filesystem`` (the PVFS fsspec path) so both jindo entry
+    points consume exactly the same credential / endpoint options.
+    """
+    if not JINDO_AVAILABLE:
+        raise ImportError("Module pyjindo is not available. Please install 
pyjindosdk.")
+
+    config = jutil.Config()
+
+    access_key_id = catalog_options.get(OssOptions.OSS_ACCESS_KEY_ID)
+    access_key_secret = catalog_options.get(OssOptions.OSS_ACCESS_KEY_SECRET)
+    security_token = catalog_options.get(OssOptions.OSS_SECURITY_TOKEN)
+    endpoint = catalog_options.get(OssOptions.OSS_ENDPOINT)
+    region = catalog_options.get(OssOptions.OSS_REGION)
+
+    if access_key_id:
+        config.set("fs.oss.accessKeyId", access_key_id)
+    if access_key_secret:
+        config.set("fs.oss.accessKeySecret", access_key_secret)
+    if security_token:
+        config.set("fs.oss.securityToken", security_token)
+    if endpoint:
+        endpoint_clean = endpoint.replace('http://', '').replace('https://', 
'')
+        config.set("fs.oss.endpoint", endpoint_clean)
+    if region:
+        config.set("fs.oss.region", region)
+    config.set("fs.oss.user.agent.features", "pypaimon")
+    return config
+
+
+def create_jindo_oss_filesystem(root_uri: str, catalog_options: Options):
+    """Create an fsspec-compatible ``JindoOssFileSystem`` for an OSS bucket.
+
+    ``PaimonVirtualFileSystem`` uses this to back OSS reads/writes with the
+    native JindoSDK instead of ``ossfs``. JindoSDK writes objects via
+    PutObject / multipart upload, so it never issues OSS ``AppendObject`` --
+    the call that fails with ``PositionNotEqualToLength`` (409) on the OSS
+    data-acceleration endpoint when ``ossfs`` flushes a multi-chunk write.
+
+    ``root_uri`` is the bucket root, e.g. ``oss://my-bucket/``; it must carry
+    the bucket so ``JindoOssFileSystem`` can re-attach the ``oss://`` scheme to
+    the bucket-relative paths that ``PaimonVirtualFileSystem`` passes in.
+    """
+    if not JINDO_AVAILABLE:
+        raise ImportError("Module pyjindo is not available. Please install 
pyjindosdk.")
+
+    return jossfs.JindoOssFileSystem(
+        uri=root_uri,
+        config=build_jindo_config(catalog_options),
+        # PaimonVirtualFileSystem owns directory semantics for the virtual FS;
+        # the backing object-store fs must not auto-create dir-marker objects.
+        auto_mkdir=False,
+        # Bypass fsspec's _Cached metaclass instance cache, so the only
+        # reference to this filesystem -- and to its underlying native jindo
+        # connection -- is the PaimonRealStorage cache in PVFS. On token
+        # refresh PVFS replaces that entry and the native resources can be
+        # released, instead of being pinned forever by fsspec's global cache.
+        skip_instance_cache=True,

Review Comment:
   Thanks — declared the minimum explicitly in e989d453.
   
     Added a new jindo extra to setup.py:
   
     'jindo': [
         'pyjindosdk>=6.10.4',
     ],
   
     so pip install pypaimon[jindo] pulls the right version.
   
     Notes on the choice of 6.10.4 and the ctor:
   
     - PyPI currently ships only pyjindosdk==6.10.4 
(https://pypi.org/simple/pyjindosdk/) — there is no older PyPI release a user 
could resolve to.
     - pyjindo.ossfs.JindoOssFileSystem.__init__ on 6.10.4 is (self, uri=None, 
config=None, auto_mkdir=True, **kwargs). skip_instance_cache flows through 
**kwargs, so the call site stays forward-compatible if a future
     6.x adds further fsspec kwargs.
     - The two-flag split above plus the precise ImportError("pyjindo.ossfs is 
not available. Please install pyjindosdk>=6.10.4.") give a clean signal when a 
user is on an older internal build that lacks pyjindo.ossfs,
     instead of an opaque attribute error later.
   
     Verified end-to-end on a fresh pip install pyjindosdk==6.10.4: PVFS 
_use_jindo_oss_backend()=True, 8 MB multipart write through JindoOssFile, and 
the token-refresh close path runs after the write lock is released.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to