jorisvandenbossche commented on code in PR #34980:
URL: https://github.com/apache/arrow/pull/34980#discussion_r1165656173
##########
python/pyarrow/table.pxi:
##########
@@ -1450,8 +1450,76 @@ cdef _sanitize_arrays(arrays, names, schema, metadata,
converted_arrays.append(item)
return converted_arrays
+cdef class _Table(_PandasConvertible):
+ """Internal: An interface for common table operations."""
-cdef class RecordBatch(_PandasConvertible):
+ def __init__(self):
+ raise TypeError("This object is not instantiable, "
+ "use a subclass instead.")
+
+ def drop_null(self):
+ """
+ Remove missing values from a Table.
Review Comment:
```suggestion
Remove missing values from a RecordBatch or Table.
```
Do we always want to mention both like this?
##########
python/pyarrow/lib.pxd:
##########
@@ -469,15 +469,19 @@ cdef class ChunkedArray(_PandasConvertible):
cdef getitem(self, int64_t i)
-cdef class Table(_PandasConvertible):
+cdef class _Table(_PandasConvertible):
Review Comment:
Do we maybe want to use "Base" in the name (to make it clearer that it is a
shared base class?) `_BaseTable` would be fine (or `_BaseTabular`, if that's a
better word to describe the commonality between RecordBatch and Table)
##########
python/pyarrow/table.pxi:
##########
@@ -1450,8 +1450,76 @@ cdef _sanitize_arrays(arrays, names, schema, metadata,
converted_arrays.append(item)
return converted_arrays
+cdef class _Table(_PandasConvertible):
+ """Internal: An interface for common table operations."""
-cdef class RecordBatch(_PandasConvertible):
+ def __init__(self):
+ raise TypeError("This object is not instantiable, "
+ "use a subclass instead.")
+
+ def drop_null(self):
+ """
+ Remove missing values from a Table.
Review Comment:
Or adding a standard sentence like "The following example uses a Table, but
it works the same for RecordBatch". Or would that just be unnecessary noise in
most cases?
##########
python/pyarrow/table.pxi:
##########
@@ -1450,8 +1450,76 @@ cdef _sanitize_arrays(arrays, names, schema, metadata,
converted_arrays.append(item)
return converted_arrays
+cdef class _Table(_PandasConvertible):
+ """Internal: An interface for common table operations."""
-cdef class RecordBatch(_PandasConvertible):
+ def __init__(self):
+ raise TypeError("This object is not instantiable, "
+ "use a subclass instead.")
+
+ def drop_null(self):
+ """
+ Remove missing values from a Table.
Review Comment:
I think that's fine to just use a single example. We could always add a
comment like `# or pa.RecordBatch.from_pandas(df)` above the equivalent Table
line, to make it clear how to run the equivalent example with a RecordBatch
instead of a Table
##########
python/pyarrow/table.pxi:
##########
@@ -1450,8 +1450,76 @@ cdef _sanitize_arrays(arrays, names, schema, metadata,
converted_arrays.append(item)
return converted_arrays
+cdef class _Table(_PandasConvertible):
+ """Internal: An interface for common table operations."""
-cdef class RecordBatch(_PandasConvertible):
+ def __init__(self):
+ raise TypeError("This object is not instantiable, "
+ "use a subclass instead.")
+
+ def drop_null(self):
+ """
+ Remove missing values from a Table.
+ See :func:`pyarrow.compute.drop_null` for full usage.
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> import pandas as pd
+ >>> df = pd.DataFrame({'year': [None, 2022, 2019, 2021],
+ ... 'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", None,
"Centipede"]})
+ >>> table = pa.Table.from_pandas(df)
+ >>> table.drop_null()
+ pyarrow.Table
+ year: double
+ n_legs: int64
+ animals: string
+ ----
+ year: [[2022,2021]]
+ n_legs: [[4,100]]
+ animals: [["Horse","Centipede"]]
+ """
+ return _pc().drop_null(self)
+
+ def take(self, object indices):
+ """
+ Select rows from the table.
+
+ See :func:`pyarrow.compute.take` for full usage.
+
+ Parameters
+ ----------
+ indices : Array or array-like
+ The indices in the table whose rows will be returned.
+
+ Returns
+ -------
+ taken : Table
Review Comment:
```suggestion
taken : RecordBatch or Table
```
##########
python/pyarrow/table.pxi:
##########
@@ -1450,8 +1450,76 @@ cdef _sanitize_arrays(arrays, names, schema, metadata,
converted_arrays.append(item)
return converted_arrays
+cdef class _Table(_PandasConvertible):
+ """Internal: An interface for common table operations."""
-cdef class RecordBatch(_PandasConvertible):
+ def __init__(self):
+ raise TypeError("This object is not instantiable, "
+ "use a subclass instead.")
+
+ def drop_null(self):
+ """
+ Remove missing values from a Table.
+ See :func:`pyarrow.compute.drop_null` for full usage.
+
+ Examples
+ --------
+ >>> import pyarrow as pa
+ >>> import pandas as pd
+ >>> df = pd.DataFrame({'year': [None, 2022, 2019, 2021],
+ ... 'n_legs': [2, 4, 5, 100],
+ ... 'animals': ["Flamingo", "Horse", None,
"Centipede"]})
+ >>> table = pa.Table.from_pandas(df)
+ >>> table.drop_null()
+ pyarrow.Table
+ year: double
+ n_legs: int64
+ animals: string
+ ----
+ year: [[2022,2021]]
+ n_legs: [[4,100]]
+ animals: [["Horse","Centipede"]]
+ """
+ return _pc().drop_null(self)
+
+ def take(self, object indices):
+ """
+ Select rows from the table.
Review Comment:
```suggestion
Select rows from the RecordBatch or Table.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]