This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new d28705d371 [python][daft] Fix double-scheme prefix (#7958)
d28705d371 is described below

commit d28705d371a3a5492fc70b7bd6b1d1a212abf41a
Author: XiaoHongbo <[email protected]>
AuthorDate: Tue May 26 09:54:05 2026 +0800

    [python][daft] Fix double-scheme prefix (#7958)
    
    ### Purpose
    When running a Daft read_paimon(...) against a Paimon table managed by
    Paimon REST catalog backed by OSS, the read crashes immediately:
    
    daft.exceptions.DaftCoreException: DaftError::External Unable to convert
    URL
    
    
"file://oss://my_bucket/my_db.db/my_tablebucket-0/data-0d475e4b-c35b-4afa-a5d9-a467cc008462-0.parquet"
        to path
    
    Note the file://oss://... — double scheme. Daft's URL parser can't make
    sense of it. This PR fixes the above issue
    
    ### Tests
    `daft_datasource_test`
---
 paimon-python/pypaimon/daft/daft_datasource.py     |  2 +
 .../pypaimon/tests/daft/daft_datasource_test.py    | 67 ++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/paimon-python/pypaimon/daft/daft_datasource.py 
b/paimon-python/pypaimon/daft/daft_datasource.py
index f6fb6f8f4c..ce063bf109 100644
--- a/paimon-python/pypaimon/daft/daft_datasource.py
+++ b/paimon-python/pypaimon/daft/daft_datasource.py
@@ -295,6 +295,8 @@ class PaimonDataSource(DataSource):
 
     def _build_file_uri(self, file_path: str) -> str:
         """Reconstruct a full URI from a (potentially scheme-stripped) 
file_path."""
+        if urlparse(file_path).scheme:
+            return file_path
         if self._warehouse_scheme:
             return f"{self._warehouse_scheme}://{file_path}"
         return f"file://{file_path}"
diff --git a/paimon-python/pypaimon/tests/daft/daft_datasource_test.py 
b/paimon-python/pypaimon/tests/daft/daft_datasource_test.py
new file mode 100644
index 0000000000..1ad6e7024b
--- /dev/null
+++ b/paimon-python/pypaimon/tests/daft/daft_datasource_test.py
@@ -0,0 +1,67 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+import unittest
+
+import pytest
+
+pypaimon = pytest.importorskip("pypaimon")
+daft = pytest.importorskip("daft")
+
+from pypaimon.daft.daft_datasource import PaimonDataSource
+
+
+def _build_uri(warehouse_scheme: str, file_path: str) -> str:
+    class _Stub:
+        pass
+    stub = _Stub()
+    stub._warehouse_scheme = warehouse_scheme
+    return PaimonDataSource._build_file_uri(stub, file_path)
+
+
+class BuildFileUriTest(unittest.TestCase):
+
+    def test_passes_through_when_path_already_has_scheme(self):
+        cases = [
+            ("",     "oss://bucket/db.db/tbl/data.parquet"),
+            ("",     "s3://bucket/key.parquet"),
+            ("",     "s3a://bucket/key.parquet"),
+            ("",     "s3n://bucket/key.parquet"),
+            ("",     "hdfs://nameservice/path/data.parquet"),
+            ("file", "file:///abs/path/data.parquet"),
+            ("oss",  "oss://bucket/db.db/tbl/data.parquet"),
+            ("",     
"oss://clg-paimon-fe4767/db.db/tbl/bucket-0/data-0.parquet"),
+        ]
+        for warehouse_scheme, file_path in cases:
+            with self.subTest(warehouse_scheme=warehouse_scheme, 
file_path=file_path):
+                self.assertEqual(_build_uri(warehouse_scheme, file_path), 
file_path)
+
+    def test_adds_warehouse_scheme_when_path_unschemed(self):
+        self.assertEqual(
+            _build_uri("oss", "bucket/db.db/tbl/data.parquet"),
+            "oss://bucket/db.db/tbl/data.parquet",
+        )
+
+    def test_defaults_to_file_scheme_when_both_unschemed(self):
+        self.assertEqual(
+            _build_uri("", "/tmp/pytest-xxx/db.db/tbl/data.parquet"),
+            "file:///tmp/pytest-xxx/db.db/tbl/data.parquet",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()

Reply via email to