XiaoHongbo-Hope commented on code in PR #8021:
URL: https://github.com/apache/paimon/pull/8021#discussion_r3346446677


##########
paimon-python/pypaimon/schema/schema.py:
##########
@@ -133,3 +102,50 @@ def from_pyarrow_schema(pa_schema: pa.Schema, 
partition_keys: Optional[List[str]
                 )
 
         return Schema(fields, partition_keys, primary_keys, options, comment)
+
+    @staticmethod
+    def _validate_blob_fields(fields, options, primary_keys):
+        """Validate blob field configurations in the schema."""
+
+        if options is None:
+            options = {}
+
+        blob_field_names = {
+            field.name for field in fields if 'blob' in str(field.type).lower()
+        }
+        core_options = CoreOptions.from_dict(options)
+
+        descriptor_fields = core_options.blob_descriptor_fields()
+        view_fields = core_options.blob_view_fields()
+        unknown_inline_fields = 
descriptor_fields.union(view_fields).difference(blob_field_names)
+        if unknown_inline_fields:
+            raise ValueError(
+                "Fields in 'blob-descriptor-field' or 'blob-view-field' must 
be blob fields "
+                "in schema. Unknown fields: 
{}".format(sorted(unknown_inline_fields))
+            )
+
+        overlapping_inline_fields = descriptor_fields.intersection(view_fields)
+        if overlapping_inline_fields:
+            raise ValueError(
+                "Fields in 'blob-descriptor-field' and 'blob-view-field' must 
not overlap. "
+                "Overlapping fields: 
{}".format(sorted(overlapping_inline_fields))
+            )
+
+        required_options = {
+            CoreOptions.ROW_TRACKING_ENABLED.key(): 'true',
+            CoreOptions.DATA_EVOLUTION_ENABLED.key(): 'true'
+        }
+
+        missing_options = []
+        for key, expected_value in required_options.items():
+            if key not in options or options[key] != expected_value:
+                missing_options.append(f"{key}='{expected_value}'")
+
+        if missing_options:
+            raise ValueError(
+                f"Schema contains Blob type but is missing required options: 
{', '.join(missing_options)}. "
+                f"Please add these options to the schema."
+            )
+

Review Comment:
   This check seems to be applied even when the schema has no BLOB fields.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to