This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new e49d964128 [python] Rename Blob Stored to Blob Descriptor Field
e49d964128 is described below
commit e49d964128f0479ad195cb765339e6237e77a5f4
Author: JingsongLi <[email protected]>
AuthorDate: Thu Feb 26 22:19:10 2026 +0800
[python] Rename Blob Stored to Blob Descriptor Field
---
paimon-python/pypaimon/common/options/core_options.py | 2 +-
paimon-python/pypaimon/read/reader/data_file_batch_reader.py | 6 +++---
paimon-python/pypaimon/read/split_read.py | 6 +++---
paimon-python/pypaimon/read/table_read.py | 2 +-
paimon-python/pypaimon/tests/blob_table_test.py | 4 ++--
paimon-python/pypaimon/write/writer/data_blob_writer.py | 12 ++++++------
6 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/paimon-python/pypaimon/common/options/core_options.py
b/paimon-python/pypaimon/common/options/core_options.py
index 5aee8ad83e..103956d60e 100644
--- a/paimon-python/pypaimon/common/options/core_options.py
+++ b/paimon-python/pypaimon/common/options/core_options.py
@@ -494,7 +494,7 @@ class CoreOptions:
def blob_as_descriptor(self, default=None):
return self.options.get(CoreOptions.BLOB_AS_DESCRIPTOR, default)
- def blob_stored_descriptor_fields(self, default=None):
+ def blob_descriptor_fields(self, default=None):
value = self.options.get(CoreOptions.BLOB_DESCRIPTOR_FIELD, default)
if value is None:
return set()
diff --git a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py
b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py
index 8ab9229e2a..7f2e1c61e1 100644
--- a/paimon-python/pypaimon/read/reader/data_file_batch_reader.py
+++ b/paimon-python/pypaimon/read/reader/data_file_batch_reader.py
@@ -42,7 +42,7 @@ class DataFileBatchReader(RecordBatchReader):
row_tracking_enabled: bool,
system_fields: dict,
blob_as_descriptor: bool = False,
- blob_stored_descriptor_fields: Optional[set] = None,
+ blob_descriptor_fields: Optional[set] = None,
file_io: Optional[FileIO] = None):
self.format_reader = format_reader
self.index_mapping = index_mapping
@@ -54,7 +54,7 @@ class DataFileBatchReader(RecordBatchReader):
self.max_sequence_number = max_sequence_number
self.system_fields = system_fields
self.blob_as_descriptor = blob_as_descriptor
- self.blob_stored_descriptor_fields = blob_stored_descriptor_fields or
set()
+ self.blob_descriptor_fields = blob_descriptor_fields or set()
self.file_io = file_io
self.blob_field_names = {
field.name
@@ -63,7 +63,7 @@ class DataFileBatchReader(RecordBatchReader):
}
self.descriptor_blob_fields = {
field_name
- for field_name in self.blob_stored_descriptor_fields
+ for field_name in self.blob_descriptor_fields
if field_name in self.blob_field_names
}
diff --git a/paimon-python/pypaimon/read/split_read.py
b/paimon-python/pypaimon/read/split_read.py
index a09d69e9f7..d76a71682b 100644
--- a/paimon-python/pypaimon/read/split_read.py
+++ b/paimon-python/pypaimon/read/split_read.py
@@ -143,7 +143,7 @@ class SplitRead(ABC):
raise ValueError(f"Unexpected file format: {file_format}")
blob_as_descriptor = CoreOptions.blob_as_descriptor(self.table.options)
- blob_stored_descriptor_fields =
CoreOptions.blob_stored_descriptor_fields(self.table.options)
+ blob_descriptor_fields =
CoreOptions.blob_descriptor_fields(self.table.options)
index_mapping = self.create_index_mapping()
partition_info = self._create_partition_info()
@@ -164,7 +164,7 @@ class SplitRead(ABC):
row_tracking_enabled,
system_fields,
blob_as_descriptor=blob_as_descriptor,
- blob_stored_descriptor_fields=blob_stored_descriptor_fields,
+ blob_descriptor_fields=blob_descriptor_fields,
file_io=self.table.file_io)
else:
return DataFileBatchReader(
@@ -178,7 +178,7 @@ class SplitRead(ABC):
row_tracking_enabled,
system_fields,
blob_as_descriptor=blob_as_descriptor,
- blob_stored_descriptor_fields=blob_stored_descriptor_fields,
+ blob_descriptor_fields=blob_descriptor_fields,
file_io=self.table.file_io)
def _get_fields_and_predicate(self, schema_id: int, read_fields):
diff --git a/paimon-python/pypaimon/read/table_read.py
b/paimon-python/pypaimon/read/table_read.py
index 10c5effde5..142444dcd4 100644
--- a/paimon-python/pypaimon/read/table_read.py
+++ b/paimon-python/pypaimon/read/table_read.py
@@ -95,7 +95,7 @@ class TableRead:
if CoreOptions.blob_as_descriptor(self.table.options):
return table
- descriptor_fields =
CoreOptions.blob_stored_descriptor_fields(self.table.options)
+ descriptor_fields =
CoreOptions.blob_descriptor_fields(self.table.options)
if not descriptor_fields:
return table
diff --git a/paimon-python/pypaimon/tests/blob_table_test.py
b/paimon-python/pypaimon/tests/blob_table_test.py
index fe7d1252e9..f49f365f89 100755
--- a/paimon-python/pypaimon/tests/blob_table_test.py
+++ b/paimon-python/pypaimon/tests/blob_table_test.py
@@ -1084,7 +1084,7 @@ class DataBlobWriterTest(unittest.TestCase):
print(" - Verified blob data can be read from descriptor")
print(" - Tested blob-as-descriptor=true read output mode")
- def test_blob_stored_descriptor_fields_mixed_mode(self):
+ def test_blob_descriptor_fields_mixed_mode(self):
import random
from pypaimon import Schema
from pypaimon.table.row.blob import BlobDescriptor
@@ -1138,7 +1138,7 @@ class DataBlobWriterTest(unittest.TestCase):
self.assertEqual(result.column('pic1').to_pylist()[0], pic1_data)
self.assertEqual(result.column('pic2').to_pylist()[0], pic2_data)
- def test_blob_stored_descriptor_fields_rejects_non_descriptor_input(self):
+ def test_blob_descriptor_fields_rejects_non_descriptor_input(self):
from pypaimon import Schema
pa_schema = pa.schema([
diff --git a/paimon-python/pypaimon/write/writer/data_blob_writer.py
b/paimon-python/pypaimon/write/writer/data_blob_writer.py
index deaada4f21..62cbd013ec 100644
--- a/paimon-python/pypaimon/write/writer/data_blob_writer.py
+++ b/paimon-python/pypaimon/write/writer/data_blob_writer.py
@@ -82,9 +82,9 @@ class DataBlobWriter(DataWriter):
# Determine blob columns from table schema
self.blob_column_names = self._get_blob_columns_from_schema()
- self.blob_stored_descriptor_fields =
CoreOptions.blob_stored_descriptor_fields(self.options)
+ self.blob_descriptor_fields =
CoreOptions.blob_descriptor_fields(self.options)
- unknown_descriptor_fields =
self.blob_stored_descriptor_fields.difference(
+ unknown_descriptor_fields = self.blob_descriptor_fields.difference(
set(self.blob_column_names)
)
if unknown_descriptor_fields:
@@ -95,7 +95,7 @@ class DataBlobWriter(DataWriter):
# Blob fields that should still be written to `.blob` files.
self.blob_file_column_names = [
- col for col in self.blob_column_names if col not in
self.blob_stored_descriptor_fields
+ col for col in self.blob_column_names if col not in
self.blob_descriptor_fields
]
all_column_names = self.table.field_names
@@ -132,7 +132,7 @@ class DataBlobWriter(DataWriter):
"stored columns: %s",
self.blob_column_names,
self.blob_file_column_names,
- sorted(self.blob_stored_descriptor_fields),
+ sorted(self.blob_descriptor_fields),
)
def _get_blob_columns_from_schema(self) -> List[str]:
@@ -219,12 +219,12 @@ class DataBlobWriter(DataWriter):
return normal_data, blob_data_map
def _validate_descriptor_stored_fields_input(self, data: pa.RecordBatch):
- if not self.blob_stored_descriptor_fields:
+ if not self.blob_descriptor_fields:
return
from pypaimon.table.row.blob import BlobDescriptor
- for field_name in self.blob_stored_descriptor_fields:
+ for field_name in self.blob_descriptor_fields:
if field_name not in data.schema.names:
continue
values =
data.column(data.schema.get_field_index(field_name)).to_pylist()