jorisvandenbossche commented on code in PR #39506:
URL: https://github.com/apache/arrow/pull/39506#discussion_r1511160754
##########
python/pyarrow/table.pxi:
##########
@@ -2483,6 +2549,254 @@ cdef class RecordBatch(_Tabular):
def __sizeof__(self):
return super(RecordBatch, self).__sizeof__() + self.nbytes
+ def add_column(self, int i, field_, column):
+ """
+ Add column to RecordBatch at position.
+
+ A new record batch is returned with the column added, the original
record batch
+ object is left unchanged.
+
+ Parameters
+ ----------
+ i : int
+ Index to place the column at.
+ field_ : str or Field
+ If a string is passed then the type is deduced from the column
+ data.
+ column : Array or value coercible to array
+ Column data.
+
+ Returns
+ -------
+ RecordBatch
+ New record batch with the passed column added.
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> import pandas as pd
+ >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", "Brittle
stars", "Centipede"]})
+ >>> batch = pa.RecordBatch.from_pandas(df)
+
+ Add column:
+
+ >>> year = [2021, 2022, 2019, 2021]
+ >>> batch.add_column(0,"year", year)
+ pyarrow.RecordBatch
+ year: int64
+ n_legs: int64
+ animals: string
+ ----
+ year: [2021,2022,2019,2021]
+ n_legs: [2,4,5,100]
+ animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+ Original record batch is left unchanged:
+
+ >>> batch
+ pyarrow.RecordBatch
+ n_legs: int64
+ animals: string
+ ----
+ n_legs: [2,4,5,100]
+ animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+ """
+ cdef:
+ shared_ptr[CRecordBatch] c_batch
+ Field c_field
+ Array c_arr
+
+ if isinstance(column, Array):
+ c_arr = column
+ else:
+ c_arr = array(column)
+
+ if isinstance(field_, Field):
+ c_field = field_
+ else:
+ c_field = field(field_, c_arr.type)
+
+ with nogil:
+ c_batch = GetResultValue(self.batch.AddColumn(
+ i, c_field.sp_field, c_arr.sp_array))
+
+ return pyarrow_wrap_batch(c_batch)
+
+ def append_column(self, field_, column):
Review Comment:
> The reason that I didn't do that is that it directly depends on having an
.add_column method which is not implemented on the base class.
We already have a similar shared `drop_columns` that depends on
`remove_column` pattern that you added in this PR. So generally I think that
kind of relationship is OK
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]