niyue commented on code in PR #13041:
URL: https://github.com/apache/arrow/pull/13041#discussion_r980795053
##########
python/pyarrow/tests/test_ipc.py:
##########
@@ -945,6 +945,62 @@ def test_ipc_zero_copy_numpy():
assert_frame_equal(df, rdf)
[email protected]
+def test_ipc_batch_with_custom_metadata_roundtrip():
+ df = pd.DataFrame({'foo': [1.5]})
+
+ batch = pa.RecordBatch.from_pandas(df)
+ sink = pa.BufferOutputStream()
+
+ batch_count = 2
+ with pa.ipc.new_file(sink, batch.schema) as writer:
+ for i in range(batch_count):
+ writer.write_batch(batch, {"batch_id": str(i)})
+
+ buffer = sink.getvalue()
+ source = pa.BufferReader(buffer)
+
+ with pa.ipc.open_file(source) as reader:
+ batch_with_metas = [reader.get_batch_with_custom_metadata(
+ i) for i in range(reader.num_record_batches)]
+
+ for i in range(batch_count):
+ assert batch_with_metas[i].batch.num_rows == 1
+ assert batch_with_metas[i].custom_metadata == {"batch_id": str(i)}
Review Comment:
Assertion added.
##########
python/pyarrow/ipc.pxi:
##########
@@ -908,6 +958,32 @@ cdef class _RecordBatchFileReader(_Weakrefable):
# time has passed
get_record_batch = get_batch
+ def get_batch_with_custom_metadata(self, int i):
+ """
+ Read the record batch with the given index along with its custom
metadata
+
+ Parameters
+ ----------
+ i : int
+ The index of the record batch in the IPC file.
+
+ Returns
+ -------
+ batch : RecordBatch
+ custom_metadata : KeyValueMetadata or dict
Review Comment:
Fixed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]