kevinjqliu commented on issue #953:
URL: https://github.com/apache/iceberg-python/issues/953#issuecomment-2243766926
I was able to reproduce this on latest main branch.
Example:
```
from pyiceberg.catalog.sql import SqlCatalog
import pyarrow as pa
schema = pa.schema([
('id', pa.int32()),
('name', pa.string()),
('age', pa.int32()),
('address', pa.struct([
('street', pa.string()),
('city', pa.string()),
('postal_code', pa.string())
])),
('contact', pa.struct([
('email', pa.string()),
('phone', pa.string())
])),
('employment', pa.struct([
pa.field('status', pa.string(), nullable=True),
pa.field('position', pa.string(), nullable=True),
pa.field('company', pa.struct([
('name', pa.string()),
('location', pa.string())
]), nullable=True)
])),
('preferences', pa.struct([
('newsletter', pa.bool_()),
('notifications', pa.struct([
('email', pa.bool_()),
('sms', pa.bool_())
]))
]))
])
catalog = SqlCatalog("default", **{"uri": "sqlite:///:memory:", "warehouse":
"."})
catalog.create_namespace("foo")
table = catalog.create_table("foo.bar", schema=schema)
# works for just selected_fields
table.scan(selected_fields=["age", "employment", 'contact.email',
'employment.status']).projection()
table.scan(selected_fields=["age", "employment", 'contact.email',
'employment.status']).to_pandas()
# works for regular row filter
table.scan(row_filter="age = '1'", selected_fields=["age", "employment",
'contact.email', 'employment.status']).projection()
table.scan(row_filter="age = '1'", selected_fields=["age", "employment",
'contact.email', 'employment.status']).to_pandas()
# works for projection
table.scan(row_filter="employment.status = 'Employed'",
selected_fields=["age", "employment", 'contact.email',
'employment.status']).projection()
# errors for to_pandas
table.scan(row_filter="employment.status = 'Employed'",
selected_fields=["age", "employment", 'contact.email',
'employment.status']).to_pandas()
```
Stack trace:
```
>>> table.scan(row_filter="employment.status = 'Employed'",
selected_fields=["age", "employment", 'contact.email',
'employment.status']).to_pandas()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/kevinliu/repos/iceberg-python/pyiceberg/table/__init__.py",
line 2033, in to_pandas
return self.to_arrow().to_pandas(**kwargs)
^^^^^^^^^^^^^^^
File "/Users/kevinliu/repos/iceberg-python/pyiceberg/table/__init__.py",
line 2003, in to_arrow
return project_table(
^^^^^^^^^^^^^^
File "/Users/kevinliu/repos/iceberg-python/pyiceberg/io/pyarrow.py", line
1169, in project_table
bound_row_filter = bind(table_metadata.schema(), row_filter,
case_sensitive=case_sensitive)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/kevinliu/repos/iceberg-python/pyiceberg/expressions/visitors.py", line
213, in bind
return visit(expression, BindVisitor(schema, case_sensitive))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/kevinliu/.pyenv/versions/3.11.0/lib/python3.11/functools.py",
line 909, in wrapper
return dispatch(args[0].__class__)(*args, **kw)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/kevinliu/repos/iceberg-python/pyiceberg/expressions/visitors.py", line
185, in _
return visitor.visit_unbound_predicate(predicate=obj)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/kevinliu/repos/iceberg-python/pyiceberg/expressions/visitors.py", line
250, in visit_unbound_predicate
return predicate.bind(self.schema, case_sensitive=self.case_sensitive)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/kevinliu/repos/iceberg-python/pyiceberg/expressions/__init__.py", line
671, in bind
bound_term = self.term.bind(schema, case_sensitive)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/Users/kevinliu/repos/iceberg-python/pyiceberg/expressions/__init__.py", line
182, in bind
field = schema.find_field(name_or_id=self.name,
case_sensitive=case_sensitive)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/kevinliu/repos/iceberg-python/pyiceberg/schema.py", line 215,
in find_field
raise ValueError(f"Could not find field with name {name_or_id},
case_sensitive={case_sensitive}")
ValueError: Could not find field with name status, case_sensitive=True
>>>
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]