This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new d775eed527 [python] Fix File Source type in data file meta (#6571)
d775eed527 is described below
commit d775eed527d7bbbd2bad99d11af08bf55f483bd4
Author: umi <[email protected]>
AuthorDate: Mon Nov 10 11:20:32 2025 +0800
[python] Fix File Source type in data file meta (#6571)
---
.../pypaimon/manifest/schema/data_file_meta.py | 4 +--
paimon-python/pypaimon/tests/manifest/__init__.py | 17 +++++++++++
.../tests/{ => manifest}/manifest_schema_test.py | 35 +++++++---------------
paimon-python/pypaimon/write/file_store_commit.py | 2 +-
.../pypaimon/write/writer/data_blob_writer.py | 2 +-
paimon-python/pypaimon/write/writer/data_writer.py | 2 +-
6 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/paimon-python/pypaimon/manifest/schema/data_file_meta.py
b/paimon-python/pypaimon/manifest/schema/data_file_meta.py
index a414644e27..405c2e3483 100644
--- a/paimon-python/pypaimon/manifest/schema/data_file_meta.py
+++ b/paimon-python/pypaimon/manifest/schema/data_file_meta.py
@@ -44,7 +44,7 @@ class DataFileMeta:
creation_time: Optional[datetime] = None
delete_row_count: Optional[int] = None
embedded_index: Optional[bytes] = None
- file_source: Optional[str] = None
+ file_source: Optional[int] = None
value_stats_cols: Optional[List[str]] = None
external_path: Optional[str] = None
first_row_id: Optional[int] = None
@@ -163,7 +163,7 @@ DATA_FILE_META_SCHEMA = {
"default": None},
{"name": "_DELETE_ROW_COUNT", "type": ["null", "long"], "default":
None},
{"name": "_EMBEDDED_FILE_INDEX", "type": ["null", "bytes"], "default":
None},
- {"name": "_FILE_SOURCE", "type": ["null", "string"], "default": None},
+ {"name": "_FILE_SOURCE", "type": ["null", "int"], "default": None},
{"name": "_VALUE_STATS_COLS",
"type": ["null", {"type": "array", "items": "string"}],
"default": None},
diff --git a/paimon-python/pypaimon/tests/manifest/__init__.py
b/paimon-python/pypaimon/tests/manifest/__init__.py
new file mode 100644
index 0000000000..53ed4d36c2
--- /dev/null
+++ b/paimon-python/pypaimon/tests/manifest/__init__.py
@@ -0,0 +1,17 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
diff --git a/paimon-python/pypaimon/tests/manifest_schema_test.py
b/paimon-python/pypaimon/tests/manifest/manifest_schema_test.py
similarity index 87%
rename from paimon-python/pypaimon/tests/manifest_schema_test.py
rename to paimon-python/pypaimon/tests/manifest/manifest_schema_test.py
index 3d82ededa6..b4b60ffd43 100644
--- a/paimon-python/pypaimon/tests/manifest_schema_test.py
+++ b/paimon-python/pypaimon/tests/manifest/manifest_schema_test.py
@@ -1,23 +1,7 @@
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
import unittest
+from pypaimon.manifest.schema import data_file_meta
+
from pypaimon.manifest.schema.data_file_meta import DATA_FILE_META_SCHEMA
from pypaimon.manifest.schema.manifest_file_meta import
MANIFEST_FILE_META_SCHEMA
from pypaimon.manifest.schema.simple_stats import (
@@ -28,7 +12,14 @@ from pypaimon.manifest.schema.simple_stats import (
class ManifestSchemaTest(unittest.TestCase):
- """Test cases for the manifest schema definitions."""
+ def test_file_source_field_type_and_default(self):
+ schema = data_file_meta.DATA_FILE_META_SCHEMA
+ fields = schema.get("fields", [])
+ file_source_field = next((f for f in fields if f.get("name") ==
"_FILE_SOURCE"), None)
+
+ self.assertIsNotNone(file_source_field, "_FILE_SOURCE field not found
in DATA_FILE_META_SCHEMA")
+ self.assertEqual(file_source_field.get("type"), ["null", "int"])
+ self.assertIsNone(file_source_field.get("default"))
def test_data_file_meta_schema_structure(self):
"""Test that DATA_FILE_META_SCHEMA has the correct structure."""
@@ -73,7 +64,7 @@ class ManifestSchemaTest(unittest.TestCase):
["null", {"type": "long", "logicalType":
"timestamp-millis"}])
self.assertEqual(field_map["_DELETE_ROW_COUNT"]["type"], ["null",
"long"])
self.assertEqual(field_map["_EMBEDDED_FILE_INDEX"]["type"], ["null",
"bytes"])
- self.assertEqual(field_map["_FILE_SOURCE"]["type"], ["null", "string"])
+ self.assertEqual(field_map["_FILE_SOURCE"]["type"], ["null", "int"])
self.assertEqual(field_map["_VALUE_STATS_COLS"]["type"], ["null",
{"type": "array", "items": "string"}])
self.assertEqual(field_map["_EXTERNAL_PATH"]["type"], ["null",
"string"])
self.assertEqual(field_map["_FIRST_ROW_ID"]["type"], ["null", "long"])
@@ -141,7 +132,3 @@ class ManifestSchemaTest(unittest.TestCase):
PARTITION_STATS_SCHEMA["name"]
]
self.assertEqual(len(names), len(set(names)), "Schema names should be
unique")
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/paimon-python/pypaimon/write/file_store_commit.py
b/paimon-python/pypaimon/write/file_store_commit.py
index afeba52b3c..224e4a82c9 100644
--- a/paimon-python/pypaimon/write/file_store_commit.py
+++ b/paimon-python/pypaimon/write/file_store_commit.py
@@ -361,7 +361,7 @@ class FileStoreCommit:
for entry in commit_entries:
# Check if this is an append file that needs row ID assignment
if (entry.kind == 0 and # ADD kind
- entry.file.file_source == "APPEND" and # APPEND file
source
+ entry.file.file_source == 0 and # APPEND file source
entry.file.first_row_id is None): # No existing
first_row_id
if self._is_blob_file(entry.file.file_name):
diff --git a/paimon-python/pypaimon/write/writer/data_blob_writer.py
b/paimon-python/pypaimon/write/writer/data_blob_writer.py
index b711d2e695..9d2e0982a4 100644
--- a/paimon-python/pypaimon/write/writer/data_blob_writer.py
+++ b/paimon-python/pypaimon/write/writer/data_blob_writer.py
@@ -301,7 +301,7 @@ class DataBlobWriter(DataWriter):
extra_files=[],
creation_time=datetime.now(),
delete_row_count=0,
- file_source="APPEND",
+ file_source=0,
value_stats_cols=self.normal_column_names,
file_path=str(file_path),
write_cols=self.write_cols)
diff --git a/paimon-python/pypaimon/write/writer/data_writer.py
b/paimon-python/pypaimon/write/writer/data_writer.py
index 24e3b0ca48..351ff32979 100644
--- a/paimon-python/pypaimon/write/writer/data_writer.py
+++ b/paimon-python/pypaimon/write/writer/data_writer.py
@@ -210,7 +210,7 @@ class DataWriter(ABC):
extra_files=[],
creation_time=datetime.now(),
delete_row_count=0,
- file_source="APPEND",
+ file_source=0,
value_stats_cols=None, # None means all columns in the data have
statistics
external_path=None,
first_row_id=None,