kerwin-zk commented on code in PR #8029:
URL: https://github.com/apache/paimon/pull/8029#discussion_r3323545088


##########
paimon-python/pypaimon/daft/daft_datasource.py:
##########
@@ -63,6 +64,113 @@ class _ReadPushdownState:
     source_limit: int | None
 
 
+def _options_to_dict(options: Any) -> dict[str, Any]:
+    if options is None:
+        return {}
+    if isinstance(options, dict):
+        return dict(options)
+
+    to_map = getattr(options, "to_map", None)
+    if callable(to_map):
+        return dict(to_map())
+
+    data = getattr(options, "data", None)
+    if isinstance(data, dict):
+        return dict(data)
+
+    return {}
+
+
+def _extract_catalog_options(table: FileStoreTable) -> dict[str, Any]:
+    file_io = getattr(table, "file_io", None)
+    properties = getattr(file_io, "properties", None)
+    if properties is None:
+        properties = getattr(file_io, "catalog_options", None)
+    return _options_to_dict(properties)
+
+
+def _extract_identifier(table: FileStoreTable) -> _PaimonIdentifier | None:
+    identifier = getattr(table, "identifier", None)
+    if identifier is None:
+        return None
+
+    get_database_name = getattr(identifier, "get_database_name", None)
+    get_table_name = getattr(identifier, "get_table_name", None)
+    get_branch_name = getattr(identifier, "get_branch_name", None)
+
+    database_name = (
+        get_database_name()
+        if callable(get_database_name)
+        else getattr(identifier, "database", None)
+    )
+    table_name = (
+        get_table_name()
+        if callable(get_table_name)
+        else getattr(identifier, "object", None)
+    )
+    branch_name = (
+        get_branch_name()
+        if callable(get_branch_name)
+        else getattr(identifier, "branch", None)
+    )
+    if database_name is None or table_name is None:
+        return None
+    return database_name, table_name, branch_name
+
+
+def _extract_table_options(table: FileStoreTable) -> dict[str, Any]:
+    table_schema = getattr(table, "table_schema", None)

Review Comment:
   Done. Added `FileStoreTable.schema()` returning the `TableSchema`, and 
`_extract_table_options` now uses `table.schema().options`.



##########
paimon-python/pypaimon/daft/daft_datasource.py:
##########
@@ -277,7 +458,12 @@ async def get_tasks(self, pushdowns: Pushdowns) -> 
AsyncIterator[DataSourceTask]
                 and not has_deletion_vectors
             )
 
-            if can_use_native_reader:
+            use_paimon_reader_task = (

Review Comment:
   Added detailed comments above the reader-selection logic in `get_tasks`, 
describing when the Daft native Parquet reader is used and when the pypaimon 
reader task is required (non-Parquet, blob columns, LSM merge, or deletion 
vectors).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to