kosiew commented on code in PR #1247:
URL:
https://github.com/apache/datafusion-python/pull/1247#discussion_r2370917745
##########
python/datafusion/context.py:
##########
@@ -619,6 +692,138 @@ def sql_with_options(self, query: str, options:
SQLOptions) -> DataFrame:
"""
return self.sql(query, options)
+ @staticmethod
+ def _extract_missing_table_names(err: Exception) -> list[str]:
+ def _normalize(names: list[Any]) -> list[str]:
+ tables: list[str] = []
+ for raw_name in names:
+ if not raw_name:
+ continue
+ raw_str = str(raw_name)
+ tables.append(raw_str.rsplit(".", 1)[-1])
+ return tables
+
+ missing_tables = getattr(err, "missing_table_names", None)
+ if missing_tables is not None:
+ if isinstance(missing_tables, str):
+ candidates: list[Any] = [missing_tables]
+ else:
+ try:
+ candidates = list(missing_tables)
+ except TypeError:
+ candidates = [missing_tables]
+
+ return _normalize(candidates)
+
+ message = str(err)
+ matches = set()
+ for pattern in (r"table '([^']+)' not found", r"No table named
'([^']+)'"):
+ matches.update(re.findall(pattern, message))
+
+ return _normalize(list(matches))
+
+ def _register_python_tables(self, tables: list[str]) -> bool:
+ registered_any = False
+ for table_name in tables:
+ if not table_name or self.table_exist(table_name):
+ continue
+
+ python_obj = self._lookup_python_object(table_name)
+ if python_obj is None:
+ continue
+
+ if self._register_python_object(table_name, python_obj):
+ registered_any = True
+
+ return registered_any
+
+ @staticmethod
+ def _lookup_python_object(name: str) -> Any | None:
+ frame = inspect.currentframe()
+ try:
+ frame = frame.f_back if frame is not None else None
+ lower_name = name.lower()
+
+ def _match(mapping: dict[str, Any]) -> Any | None:
+ value = mapping.get(name)
+ if value is not None:
+ return value
+
+ for key, candidate in mapping.items():
+ if (
+ isinstance(key, str)
+ and key.lower() == lower_name
+ and candidate is not None
+ ):
+ return candidate
+
+ return None
+
+ while frame is not None:
+ for scope in (frame.f_locals, frame.f_globals):
+ match = _match(scope)
+ if match is not None:
+ return match
+ frame = frame.f_back
+ finally:
+ del frame
+ return None
+
+ def _refresh_python_table_bindings(self) -> None:
+ bindings = getattr(self, "_python_table_bindings", {})
+ for table_name, (obj_ref, cached_id) in list(bindings.items()):
+ cached_obj = obj_ref() if obj_ref is not None else None
+ current_obj = self._lookup_python_object(table_name)
+ weakref_dead = obj_ref is not None and cached_obj is None
+ id_mismatch = current_obj is not None and id(current_obj) !=
cached_id
+
+ if not (weakref_dead or id_mismatch):
+ continue
+
+ self.deregister_table(table_name)
+
+ if current_obj is None:
+ bindings.pop(table_name, None)
+ continue
+
+ if self._register_python_object(table_name, current_obj):
+ continue
+
+ bindings.pop(table_name, None)
+
+ def _register_python_object(self, name: str, obj: Any) -> bool:
+ registered = False
+
+ if isinstance(obj, DataFrame):
+ self.register_view(name, obj)
+ registered = True
+ elif (
+ obj.__class__.__module__.startswith("polars.")
+ and obj.__class__.__name__ == "DataFrame"
+ ):
+ self.from_polars(obj, name=name)
+ registered = True
+ elif (
+ obj.__class__.__module__.startswith("pandas.")
+ and obj.__class__.__name__ == "DataFrame"
+ ):
+ self.from_pandas(obj, name=name)
+ registered = True
+ elif isinstance(obj, (pa.Table, pa.RecordBatch, pa.RecordBatchReader))
or (
+ hasattr(obj, "__arrow_c_stream__") or hasattr(obj,
"__arrow_c_array__")
+ ):
+ self.from_arrow(obj, name=name)
+ registered = True
Review Comment:
Good point.
I will invert the if comparison to check for
```python
hasattr(obj, "__arrow_c_stream__")
```
before falling back to checking for modules
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]