This is an automated email from the ASF dual-hosted git repository.
honahx pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new b11cdb54 Deprecate to_requested_schema (#918)
b11cdb54 is described below
commit b11cdb54b1a05cce0ade34af4ce81a94c34b2650
Author: Sung Yun <[email protected]>
AuthorDate: Fri Jul 12 16:45:04 2024 -0400
Deprecate to_requested_schema (#918)
* deprecate to_requested_schema
* prep for release
---
mkdocs/docs/how-to-release.md | 15 +++++++++++++++
pyiceberg/io/pyarrow.py | 20 ++++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/mkdocs/docs/how-to-release.md b/mkdocs/docs/how-to-release.md
index 99baec25..4824cb99 100644
--- a/mkdocs/docs/how-to-release.md
+++ b/mkdocs/docs/how-to-release.md
@@ -23,6 +23,21 @@ The guide to release PyIceberg.
The first step is to publish a release candidate (RC) and publish it to the
public for testing and validation. Once the vote has passed on the RC, the RC
turns into the new release.
+## Preparing for a release
+
+Before running the release candidate, we want to remove any APIs that were
marked for removal under the @deprecated tag for this release.
+
+For example, the API with the following deprecation tag should be removed when
preparing for the 0.2.0 release.
+
+```python
+
+@deprecated(
+ deprecated_in="0.1.0",
+ removed_in="0.2.0",
+ help_message="Please use load_something_else() instead",
+)
+```
+
## Running a release candidate
Make sure that the version is correct in `pyproject.toml` and
`pyiceberg/__init__.py`. Correct means that it reflects the version that you
want to release.
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 1ef9fc9b..199133f7 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -156,6 +156,7 @@ from pyiceberg.types import (
from pyiceberg.utils.concurrent import ExecutorFactory
from pyiceberg.utils.config import Config
from pyiceberg.utils.datetime import millis_to_datetime
+from pyiceberg.utils.deprecated import deprecated
from pyiceberg.utils.singleton import Singleton
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string,
truncate_upper_bound_text_string
@@ -1279,6 +1280,23 @@ def project_batches(
total_row_count += len(batch)
+@deprecated(
+ deprecated_in="0.7.0",
+ removed_in="0.8.0",
+ help_message="The public API for 'to_requested_schema' is deprecated and
is replaced by '_to_requested_schema'",
+)
+def to_requested_schema(requested_schema: Schema, file_schema: Schema, table:
pa.Table) -> pa.Table:
+ struct_array = visit_with_partner(requested_schema, table,
ArrowProjectionVisitor(file_schema), ArrowAccessor(file_schema))
+
+ arrays = []
+ fields = []
+ for pos, field in enumerate(requested_schema.fields):
+ array = struct_array.field(pos)
+ arrays.append(array)
+ fields.append(pa.field(field.name, array.type, field.optional))
+ return pa.Table.from_arrays(arrays, schema=pa.schema(fields))
+
+
def _to_requested_schema(
requested_schema: Schema,
file_schema: Schema,
@@ -1434,6 +1452,8 @@ class ArrowAccessor(PartnerAccessor[pa.Array]):
if isinstance(partner_struct, pa.StructArray):
return partner_struct.field(name)
+ elif isinstance(partner_struct, pa.Table):
+ return partner_struct.column(name).combine_chunks()
elif isinstance(partner_struct, pa.RecordBatch):
return partner_struct.column(name)
else: