This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new d28705d371 [python][daft] Fix double-scheme prefix (#7958)
d28705d371 is described below
commit d28705d371a3a5492fc70b7bd6b1d1a212abf41a
Author: XiaoHongbo <[email protected]>
AuthorDate: Tue May 26 09:54:05 2026 +0800
[python][daft] Fix double-scheme prefix (#7958)
### Purpose
When running a Daft read_paimon(...) against a Paimon table managed by
Paimon REST catalog backed by OSS, the read crashes immediately:
daft.exceptions.DaftCoreException: DaftError::External Unable to convert
URL
"file://oss://my_bucket/my_db.db/my_tablebucket-0/data-0d475e4b-c35b-4afa-a5d9-a467cc008462-0.parquet"
to path
Note the file://oss://... — double scheme. Daft's URL parser can't make
sense of it. This PR fixes the above issue
### Tests
`daft_datasource_test`
---
paimon-python/pypaimon/daft/daft_datasource.py | 2 +
.../pypaimon/tests/daft/daft_datasource_test.py | 67 ++++++++++++++++++++++
2 files changed, 69 insertions(+)
diff --git a/paimon-python/pypaimon/daft/daft_datasource.py
b/paimon-python/pypaimon/daft/daft_datasource.py
index f6fb6f8f4c..ce063bf109 100644
--- a/paimon-python/pypaimon/daft/daft_datasource.py
+++ b/paimon-python/pypaimon/daft/daft_datasource.py
@@ -295,6 +295,8 @@ class PaimonDataSource(DataSource):
def _build_file_uri(self, file_path: str) -> str:
"""Reconstruct a full URI from a (potentially scheme-stripped)
file_path."""
+ if urlparse(file_path).scheme:
+ return file_path
if self._warehouse_scheme:
return f"{self._warehouse_scheme}://{file_path}"
return f"file://{file_path}"
diff --git a/paimon-python/pypaimon/tests/daft/daft_datasource_test.py
b/paimon-python/pypaimon/tests/daft/daft_datasource_test.py
new file mode 100644
index 0000000000..1ad6e7024b
--- /dev/null
+++ b/paimon-python/pypaimon/tests/daft/daft_datasource_test.py
@@ -0,0 +1,67 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+import unittest
+
+import pytest
+
+pypaimon = pytest.importorskip("pypaimon")
+daft = pytest.importorskip("daft")
+
+from pypaimon.daft.daft_datasource import PaimonDataSource
+
+
+def _build_uri(warehouse_scheme: str, file_path: str) -> str:
+ class _Stub:
+ pass
+ stub = _Stub()
+ stub._warehouse_scheme = warehouse_scheme
+ return PaimonDataSource._build_file_uri(stub, file_path)
+
+
+class BuildFileUriTest(unittest.TestCase):
+
+ def test_passes_through_when_path_already_has_scheme(self):
+ cases = [
+ ("", "oss://bucket/db.db/tbl/data.parquet"),
+ ("", "s3://bucket/key.parquet"),
+ ("", "s3a://bucket/key.parquet"),
+ ("", "s3n://bucket/key.parquet"),
+ ("", "hdfs://nameservice/path/data.parquet"),
+ ("file", "file:///abs/path/data.parquet"),
+ ("oss", "oss://bucket/db.db/tbl/data.parquet"),
+ ("",
"oss://clg-paimon-fe4767/db.db/tbl/bucket-0/data-0.parquet"),
+ ]
+ for warehouse_scheme, file_path in cases:
+ with self.subTest(warehouse_scheme=warehouse_scheme,
file_path=file_path):
+ self.assertEqual(_build_uri(warehouse_scheme, file_path),
file_path)
+
+ def test_adds_warehouse_scheme_when_path_unschemed(self):
+ self.assertEqual(
+ _build_uri("oss", "bucket/db.db/tbl/data.parquet"),
+ "oss://bucket/db.db/tbl/data.parquet",
+ )
+
+ def test_defaults_to_file_scheme_when_both_unschemed(self):
+ self.assertEqual(
+ _build_uri("", "/tmp/pytest-xxx/db.db/tbl/data.parquet"),
+ "file:///tmp/pytest-xxx/db.db/tbl/data.parquet",
+ )
+
+
+if __name__ == "__main__":
+ unittest.main()