This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new ca1a0eb2dd GH-37217: [Python] Add missing docstrings to Cython (#37218)
ca1a0eb2dd is described below
commit ca1a0eb2dde5878275aa433384c38e33b4329260
Author: Dane Pitkin <[email protected]>
AuthorDate: Thu Aug 17 23:03:28 2023 -0400
GH-37217: [Python] Add missing docstrings to Cython (#37218)
### Rationale for this change
The Cython 3.0.0 upgrade https://github.com/apache/arrow/pull/37097 is
triggering numpydoc errors for these missing docstrings.
### What changes are included in this PR?
* Docstrings added to Cython functions that omitted them
### Are these changes tested?
Yes, locally.
### Are there any user-facing changes?
User-facing documentation is added.
* Closes: #37217
Lead-authored-by: Dane Pitkin <[email protected]>
Co-authored-by: Dane Pitkin <[email protected]>
Co-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
python/pyarrow/_compute.pyx | 9 +++
python/pyarrow/_csv.pyx | 27 ++++++++
python/pyarrow/_dataset.pyx | 74 ++++++++++++++++++++
python/pyarrow/_dataset_orc.pyx | 9 +++
python/pyarrow/_dataset_parquet.pyx | 41 +++++++++++
python/pyarrow/_fs.pyx | 9 +++
python/pyarrow/_json.pyx | 18 +++++
python/pyarrow/_parquet.pyx | 92 ++++++++++++++++++++++++
python/pyarrow/array.pxi | 33 +++++++++
python/pyarrow/gandiva.pyx | 135 ++++++++++++++++++++++++++++++++++++
python/pyarrow/io.pxi | 54 +++++++++++++++
python/pyarrow/scalar.pxi | 9 +++
python/pyarrow/table.pxi | 11 +++
python/pyarrow/tensor.pxi | 85 +++++++++++++++++++++++
python/pyarrow/types.pxi | 45 ++++++++++++
15 files changed, 651 insertions(+)
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index bc3b9e8c55..453f487c4d 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -2345,6 +2345,15 @@ cdef class Expression(_Weakrefable):
return self.expr
def equals(self, Expression other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.dataset.Expression
+
+ Returns
+ -------
+ bool
+ """
return self.expr.Equals(other.unwrap())
def __str__(self):
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 0b72f5249f..e532d8d8ab 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -290,6 +290,15 @@ cdef class ReadOptions(_Weakrefable):
check_status(deref(self.options).Validate())
def equals(self, ReadOptions other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.csv.ReadOptions
+
+ Returns
+ -------
+ bool
+ """
return (
self.use_threads == other.use_threads and
self.block_size == other.block_size and
@@ -536,6 +545,15 @@ cdef class ParseOptions(_Weakrefable):
check_status(deref(self.options).Validate())
def equals(self, ParseOptions other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.csv.ParseOptions
+
+ Returns
+ -------
+ bool
+ """
return (
self.delimiter == other.delimiter and
self.quote_char == other.quote_char and
@@ -1042,6 +1060,15 @@ cdef class ConvertOptions(_Weakrefable):
check_status(deref(self.options).Validate())
def equals(self, ConvertOptions other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.csv.ConvertOptions
+
+ Returns
+ -------
+ bool
+ """
return (
self.check_utf8 == other.check_utf8 and
self.column_types == other.column_types and
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index badf6e4a4c..8f5688de29 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -1992,9 +1992,27 @@ cdef class IpcFileFormat(FileFormat):
self.init(shared_ptr[CFileFormat](new CIpcFileFormat()))
def equals(self, IpcFileFormat other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.dataset.IpcFileFormat
+
+ Returns
+ -------
+ True
+ """
return True
def make_write_options(self, **kwargs):
+ """
+ Parameters
+ ----------
+ **kwargs : dict
+
+ Returns
+ -------
+ pyarrow.ipc.IpcWriteOptions
+ """
cdef IpcFileWriteOptions opts = \
<IpcFileWriteOptions> FileFormat.make_write_options(self)
opts.write_options = IpcWriteOptions(**kwargs)
@@ -2071,6 +2089,15 @@ cdef class CsvFileFormat(FileFormat):
self.csv_format = <CCsvFileFormat*> sp.get()
def make_write_options(self, **kwargs):
+ """
+ Parameters
+ ----------
+ **kwargs : dict
+
+ Returns
+ -------
+ pyarrow.csv.WriteOptions
+ """
cdef CsvFileWriteOptions opts = \
<CsvFileWriteOptions> FileFormat.make_write_options(self)
opts.write_options = WriteOptions(**kwargs)
@@ -2093,6 +2120,15 @@ cdef class CsvFileFormat(FileFormat):
super()._set_default_fragment_scan_options(options)
def equals(self, CsvFileFormat other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.dataset.CsvFileFormat
+
+ Returns
+ -------
+ bool
+ """
return (
self.parse_options.equals(other.parse_options) and
self.default_fragment_scan_options ==
@@ -2165,6 +2201,15 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions):
make_streamwrap_func(read_options.encoding, 'utf-8'))
def equals(self, CsvFragmentScanOptions other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.dataset.CsvFragmentScanOptions
+
+ Returns
+ -------
+ bool
+ """
return (
other and
self.convert_options.equals(other.convert_options) and
@@ -2250,6 +2295,15 @@ cdef class JsonFileFormat(FileFormat):
super()._set_default_fragment_scan_options(options)
def equals(self, JsonFileFormat other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.dataset.JsonFileFormat
+
+ Returns
+ -------
+ bool
+ """
return (other and
self.default_fragment_scan_options ==
other.default_fragment_scan_options)
@@ -2308,6 +2362,15 @@ cdef class JsonFragmentScanOptions(FragmentScanOptions):
self.json_options.read_options = read_options.options
def equals(self, JsonFragmentScanOptions other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.dataset.JsonFragmentScanOptions
+
+ Returns
+ -------
+ bool
+ """
return (
other and
self.read_options.equals(other.read_options) and
@@ -2353,6 +2416,17 @@ cdef class Partitioning(_Weakrefable):
return False
def parse(self, path):
+ """
+ Parse a path into a partition expression.
+
+ Parameters
+ ----------
+ path : str
+
+ Returns
+ -------
+ pyarrow.dataset.Expression
+ """
cdef CResult[CExpression] result
result = self.partitioning.Parse(tobytes(path))
return Expression.wrap(GetResultValue(result))
diff --git a/python/pyarrow/_dataset_orc.pyx b/python/pyarrow/_dataset_orc.pyx
index 40a21ef546..a8cce33622 100644
--- a/python/pyarrow/_dataset_orc.pyx
+++ b/python/pyarrow/_dataset_orc.pyx
@@ -32,6 +32,15 @@ cdef class OrcFileFormat(FileFormat):
self.init(shared_ptr[CFileFormat](new COrcFileFormat()))
def equals(self, OrcFileFormat other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.dataset.OrcFileFormat
+
+ Returns
+ -------
+ True
+ """
return True
@property
diff --git a/python/pyarrow/_dataset_parquet.pyx
b/python/pyarrow/_dataset_parquet.pyx
index 4ad0caec30..4de396f4f5 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -178,6 +178,15 @@ cdef class ParquetFileFormat(FileFormat):
return parquet_read_options
def make_write_options(self, **kwargs):
+ """
+ Parameters
+ ----------
+ **kwargs : dict
+
+ Returns
+ -------
+ pyarrow.dataset.FileWriteOptions
+ """
opts = FileFormat.make_write_options(self)
(<ParquetFileWriteOptions> opts).update(**kwargs)
return opts
@@ -189,6 +198,15 @@ cdef class ParquetFileFormat(FileFormat):
super()._set_default_fragment_scan_options(options)
def equals(self, ParquetFileFormat other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.dataset.ParquetFileFormat
+
+ Returns
+ -------
+ bool
+ """
return (
self.read_options.equals(other.read_options) and
self.default_fragment_scan_options ==
@@ -502,6 +520,15 @@ cdef class ParquetReadOptions(_Weakrefable):
self._coerce_int96_timestamp_unit = TimeUnit_NANO
def equals(self, ParquetReadOptions other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.dataset.ParquetReadOptions
+
+ Returns
+ -------
+ bool
+ """
return (self.dictionary_columns == other.dictionary_columns and
self.coerce_int96_timestamp_unit ==
other.coerce_int96_timestamp_unit)
@@ -527,6 +554,11 @@ cdef class ParquetFileWriteOptions(FileWriteOptions):
object _properties
def update(self, **kwargs):
+ """
+ Parameters
+ ----------
+ **kwargs : dict
+ """
arrow_fields = {
"use_deprecated_int96_timestamps",
"coerce_timestamps",
@@ -720,6 +752,15 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
self.reader_properties().set_thrift_container_size_limit(size)
def equals(self, ParquetFragmentScanOptions other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.dataset.ParquetFragmentScanOptions
+
+ Returns
+ -------
+ bool
+ """
attrs = (
self.use_buffered_stream, self.buffer_size, self.pre_buffer,
self.thrift_string_size_limit, self.thrift_container_size_limit)
diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx
index 69105afc2f..dbd7ebe5e4 100644
--- a/python/pyarrow/_fs.pyx
+++ b/python/pyarrow/_fs.pyx
@@ -505,6 +505,15 @@ cdef class FileSystem(_Weakrefable):
return self.wrapped
def equals(self, FileSystem other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.fs.FileSystem
+
+ Returns
+ -------
+ bool
+ """
return self.fs.Equals(other.unwrap())
def __eq__(self, other):
diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx
index 70cde6e23f..d36dad67ab 100644
--- a/python/pyarrow/_json.pyx
+++ b/python/pyarrow/_json.pyx
@@ -83,6 +83,15 @@ cdef class ReadOptions(_Weakrefable):
)
def equals(self, ReadOptions other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.json.ReadOptions
+
+ Returns
+ -------
+ bool
+ """
return (
self.use_threads == other.use_threads and
self.block_size == other.block_size
@@ -212,6 +221,15 @@ cdef class ParseOptions(_Weakrefable):
self.options.unexpected_field_behavior = v
def equals(self, ParseOptions other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.json.ParseOptions
+
+ Returns
+ -------
+ bool
+ """
return (
self.explicit_schema == other.explicit_schema and
self.newlines_in_values == other.newlines_in_values and
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 4448f359ac..50b4ed8e86 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -1183,6 +1183,22 @@ cdef class ParquetReader(_Weakrefable):
FileDecryptionProperties decryption_properties=None,
thrift_string_size_limit=None,
thrift_container_size_limit=None):
+ """
+ Open a parquet file for reading.
+
+ Parameters
+ ----------
+ source : str, pathlib.Path, pyarrow.NativeFile, or file-like object
+ use_memory_map : bool, default False
+ read_dictionary : iterable[int or str], optional
+ metadata : FileMetaData, optional
+ buffer_size : int, default 0
+ pre_buffer : bool, default False
+ coerce_int96_timestamp_unit : str, optional
+ decryption_properties : FileDecryptionProperties, optional
+ thrift_string_size_limit : int, optional
+ thrift_container_size_limit : int, optional
+ """
cdef:
shared_ptr[CFileMetaData] c_metadata
CReaderProperties properties = default_reader_properties()
@@ -1285,13 +1301,35 @@ cdef class ParquetReader(_Weakrefable):
return self.reader.get().num_row_groups()
def set_use_threads(self, bint use_threads):
+ """
+ Parameters
+ ----------
+ use_threads : bool
+ """
self.reader.get().set_use_threads(use_threads)
def set_batch_size(self, int64_t batch_size):
+ """
+ Parameters
+ ----------
+ batch_size : int64
+ """
self.reader.get().set_batch_size(batch_size)
def iter_batches(self, int64_t batch_size, row_groups, column_indices=None,
bint use_threads=True):
+ """
+ Parameters
+ ----------
+ batch_size : int64
+ row_groups : list[int]
+ column_indices : list[int], optional
+ use_threads : bool, default True
+
+ Yields
+ ------
+ next : RecordBatch
+ """
cdef:
vector[int] c_row_groups
vector[int] c_column_indices
@@ -1336,10 +1374,32 @@ cdef class ParquetReader(_Weakrefable):
def read_row_group(self, int i, column_indices=None,
bint use_threads=True):
+ """
+ Parameters
+ ----------
+ i : int
+ column_indices : list[int], optional
+ use_threads : bool, default True
+
+ Returns
+ -------
+ table : pyarrow.Table
+ """
return self.read_row_groups([i], column_indices, use_threads)
def read_row_groups(self, row_groups not None, column_indices=None,
bint use_threads=True):
+ """
+ Parameters
+ ----------
+ row_groups : list[int]
+ column_indices : list[int], optional
+ use_threads : bool, default True
+
+ Returns
+ -------
+ table : pyarrow.Table
+ """
cdef:
shared_ptr[CTable] ctable
vector[int] c_row_groups
@@ -1366,6 +1426,16 @@ cdef class ParquetReader(_Weakrefable):
return pyarrow_wrap_table(ctable)
def read_all(self, column_indices=None, bint use_threads=True):
+ """
+ Parameters
+ ----------
+ column_indices : list[int], optional
+ use_threads : bool, default True
+
+ Returns
+ -------
+ table : pyarrow.Table
+ """
cdef:
shared_ptr[CTable] ctable
vector[int] c_column_indices
@@ -1387,6 +1457,16 @@ cdef class ParquetReader(_Weakrefable):
return pyarrow_wrap_table(ctable)
def scan_contents(self, column_indices=None, batch_size=65536):
+ """
+ Parameters
+ ----------
+ column_indices : list[int], optional
+ batch_size : int32, default 65536
+
+ Returns
+ -------
+ num_rows : int64
+ """
cdef:
vector[int] c_column_indices
int32_t c_batch_size
@@ -1434,6 +1514,18 @@ cdef class ParquetReader(_Weakrefable):
return self._column_idx_map[tobytes(column_name)]
def read_column(self, int column_index):
+ """
+ Read the column at the specified index.
+
+ Parameters
+ ----------
+ column_index : int
+ Index of the column.
+
+ Returns
+ -------
+ column : pyarrow.ChunkedArray
+ """
cdef shared_ptr[CChunkedArray] out
with nogil:
check_status(self.reader.get()
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 2f8959cd72..ce4eafd8e3 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1265,6 +1265,17 @@ cdef class Array(_PandasConvertible):
return frombytes(result, safe=True)
def format(self, **kwargs):
+ """
+ DEPRECATED, use pyarrow.Array.to_string
+
+ Parameters
+ ----------
+ **kwargs : dict
+
+ Returns
+ -------
+ str
+ """
import warnings
warnings.warn('Array.format is deprecated, use Array.to_string')
return self.to_string(**kwargs)
@@ -1281,6 +1292,15 @@ cdef class Array(_PandasConvertible):
return NotImplemented
def equals(Array self, Array other not None):
+ """
+ Parameters
+ ----------
+ other : pyarrow.Array
+
+ Returns
+ -------
+ bool
+ """
return self.ap.Equals(deref(other.ap))
def __len__(self):
@@ -2336,6 +2356,19 @@ cdef class UnionArray(Array):
"""
def child(self, int pos):
+ """
+ DEPRECATED, use field() instead.
+
+ Parameters
+ ----------
+ pos : int
+ The physical index of the union child field (not its type code).
+
+ Returns
+ -------
+ field : pyarrow.Field
+ The given child field.
+ """
import warnings
warnings.warn("child is deprecated, use field", FutureWarning)
return self.field(pos)
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index cc46bc760f..35bbf5018f 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -191,6 +191,19 @@ cdef class Projector(_Weakrefable):
return self.projector.get().DumpIR().decode()
def evaluate(self, RecordBatch batch, SelectionVector selection=None):
+ """
+ Evaluate the specified record batch and return the arrays at the
+ filtered positions.
+
+ Parameters
+ ----------
+ batch : pyarrow.RecordBatch
+ selection : pyarrow.gandiva.SelectionVector
+
+ Returns
+ -------
+ list[pyarrow.Array]
+ """
cdef vector[shared_ptr[CArray]] results
if selection is None:
check_status(self.projector.get().Evaluate(
@@ -227,6 +240,19 @@ cdef class Filter(_Weakrefable):
return self.filter.get().DumpIR().decode()
def evaluate(self, RecordBatch batch, MemoryPool pool, dtype='int32'):
+ """
+ Evaluate the specified record batch and return a selection vector.
+
+ Parameters
+ ----------
+ batch : pyarrow.RecordBatch
+ pool : MemoryPool
+ dtype : DataType or str, default int32
+
+ Returns
+ -------
+ pyarrow.gandiva.SelectionVector
+ """
cdef:
DataType type = ensure_type(dtype)
shared_ptr[CSelectionVector] selection
@@ -252,6 +278,18 @@ cdef class Filter(_Weakrefable):
cdef class TreeExprBuilder(_Weakrefable):
def make_literal(self, value, dtype):
+ """
+ Create a node on a literal.
+
+ Parameters
+ ----------
+ value : a literal value
+ dtype : DataType
+
+ Returns
+ -------
+ pyarrow.gandiva.Node
+ """
cdef:
DataType type = ensure_type(dtype)
shared_ptr[CNode] r
@@ -289,6 +327,19 @@ cdef class TreeExprBuilder(_Weakrefable):
def make_expression(self, Node root_node not None,
Field return_field not None):
+ """
+ Create an expression with the specified root_node,
+ and the result written to result_field.
+
+ Parameters
+ ----------
+ root_node : pyarrow.gandiva.Node
+ return_field : pyarrow.Field
+
+ Returns
+ -------
+ pyarrow.gandiva.Expression
+ """
cdef shared_ptr[CGandivaExpression] r = TreeExprBuilder_MakeExpression(
root_node.node, return_field.sp_field)
cdef Expression expression = Expression()
@@ -296,6 +347,19 @@ cdef class TreeExprBuilder(_Weakrefable):
return expression
def make_function(self, name, children, DataType return_type):
+ """
+ Create a node with a function.
+
+ Parameters
+ ----------
+ name : str
+ children : pyarrow.gandiva.NodeVector
+ return_type : DataType
+
+ Returns
+ -------
+ pyarrow.gandiva.Node
+ """
cdef c_vector[shared_ptr[CNode]] c_children
cdef Node child
for child in children:
@@ -307,17 +371,53 @@ cdef class TreeExprBuilder(_Weakrefable):
return Node.create(r)
def make_field(self, Field field not None):
+ """
+ Create a node with an Arrow field.
+
+ Parameters
+ ----------
+ field : pyarrow.Field
+
+ Returns
+ -------
+ pyarrow.gandiva.Node
+ """
cdef shared_ptr[CNode] r = TreeExprBuilder_MakeField(field.sp_field)
return Node.create(r)
def make_if(self, Node condition not None, Node this_node not None,
Node else_node not None, DataType return_type not None):
+ """
+ Create a node with an if-else expression.
+
+ Parameters
+ ----------
+ condition : pyarrow.gandiva.Node
+ this_node : pyarrow.gandiva.Node
+ else_node : pyarrow.gandiva.Node
+ return_type : DataType
+
+ Returns
+ -------
+ pyarrow.gandiva.Node
+ """
cdef shared_ptr[CNode] r = TreeExprBuilder_MakeIf(
condition.node, this_node.node, else_node.node,
return_type.sp_type)
return Node.create(r)
def make_and(self, children):
+ """
+ Create a Node with a boolean AND expression.
+
+ Parameters
+ ----------
+ children : list[pyarrow.gandiva.Node]
+
+ Returns
+ -------
+ pyarrow.gandiva.Node
+ """
cdef c_vector[shared_ptr[CNode]] c_children
cdef Node child
for child in children:
@@ -328,6 +428,17 @@ cdef class TreeExprBuilder(_Weakrefable):
return Node.create(r)
def make_or(self, children):
+ """
+ Create a Node with a boolean OR expression.
+
+ Parameters
+ ----------
+ children : list[pyarrow.gandiva.Node]
+
+ Returns
+ -------
+ pyarrow.gandiva.Node
+ """
cdef c_vector[shared_ptr[CNode]] c_children
cdef Node child
for child in children:
@@ -420,6 +531,19 @@ cdef class TreeExprBuilder(_Weakrefable):
return Node.create(r)
def make_in_expression(self, Node node not None, values, dtype):
+ """
+ Create a Node with an IN expression.
+
+ Parameters
+ ----------
+ node : pyarrow.gandiva.Node
+ values : iterable
+ dtype : DataType
+
+ Returns
+ -------
+ pyarrow.gandiva.Node
+ """
cdef DataType type = ensure_type(dtype)
if type.id == _Type_INT32:
@@ -444,6 +568,17 @@ cdef class TreeExprBuilder(_Weakrefable):
raise TypeError("Data type " + str(dtype) + " not supported.")
def make_condition(self, Node condition not None):
+ """
+ Create a condition with the specified node.
+
+ Parameters
+ ----------
+ condition : pyarrow.gandiva.Node
+
+ Returns
+ -------
+ pyarrow.gandiva.Condition
+ """
cdef shared_ptr[CCondition] r = TreeExprBuilder_MakeCondition(
condition.node)
return Condition.create(r)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 2a78f7e795..e3018ab470 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -575,6 +575,14 @@ cdef class NativeFile(_Weakrefable):
return line
def read_buffer(self, nbytes=None):
+ """
+ Read from buffer.
+
+ Parameters
+ ----------
+ nbytes : int, optional
+ maximum number of bytes read
+ """
cdef:
int64_t c_nbytes
int64_t bytes_read = 0
@@ -602,6 +610,14 @@ cdef class NativeFile(_Weakrefable):
raise UnsupportedOperation()
def writelines(self, lines):
+ """
+ Write lines to the file.
+
+ Parameters
+ ----------
+ lines : iterable
+ Iterable of bytes-like objects or exporters of buffer protocol
+ """
self._assert_writable()
for line in lines:
@@ -865,12 +881,35 @@ cdef class PythonFile(NativeFile):
self.is_writable = True
def truncate(self, pos=None):
+ """
+ Parameters
+ ----------
+ pos : int, optional
+ """
self.handle.truncate(pos)
def readline(self, size=None):
+ """
+ Read and return a line of bytes from the file.
+
+ If size is specified, read at most size bytes.
+
+ Parameters
+ ----------
+ size : int
+ Maximum number of bytes read
+ """
return self.handle.readline(size)
def readlines(self, hint=None):
+ """
+ Read lines of the file.
+
+ Parameters
+ ----------
+ hint : int
+ Maximum number of bytes read until we stop
+ """
return self.handle.readlines(hint)
@@ -1146,16 +1185,31 @@ cdef class FixedSizeBufferWriter(NativeFile):
self.is_writable = True
def set_memcopy_threads(self, int num_threads):
+ """
+ Parameters
+ ----------
+ num_threads : int
+ """
cdef CFixedSizeBufferWriter* writer = \
<CFixedSizeBufferWriter*> self.output_stream.get()
writer.set_memcopy_threads(num_threads)
def set_memcopy_blocksize(self, int64_t blocksize):
+ """
+ Parameters
+ ----------
+ blocksize : int64
+ """
cdef CFixedSizeBufferWriter* writer = \
<CFixedSizeBufferWriter*> self.output_stream.get()
writer.set_memcopy_blocksize(blocksize)
def set_memcopy_threshold(self, int64_t threshold):
+ """
+ Parameters
+ ----------
+ threshold : int64
+ """
cdef CFixedSizeBufferWriter* writer = \
<CFixedSizeBufferWriter*> self.output_stream.get()
writer.set_memcopy_threshold(threshold)
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index aff1c311ab..e19807ba56 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -123,6 +123,15 @@ cdef class Scalar(_Weakrefable):
return str(self.as_py())
def equals(self, Scalar other not None):
+ """
+ Parameters
+ ----------
+ other : pyarrow.Scalar
+
+ Returns
+ -------
+ bool
+ """
return self.wrapped.get().Equals(other.unwrap().get()[0])
def __eq__(self, other):
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index f08162089b..2eae38485d 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -160,6 +160,17 @@ cdef class ChunkedArray(_PandasConvertible):
return frombytes(result, safe=True)
def format(self, **kwargs):
+ """
+ DEPRECATED, use pyarrow.ChunkedArray.to_string
+
+ Parameters
+ ----------
+ **kwargs : dict
+
+ Returns
+ -------
+ str
+ """
import warnings
warnings.warn('ChunkedArray.format is deprecated, '
'use ChunkedArray.to_string')
diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi
index ba732879aa..1afce7f4a1 100644
--- a/python/pyarrow/tensor.pxi
+++ b/python/pyarrow/tensor.pxi
@@ -316,6 +316,17 @@ shape: {0.shape}""".format(self)
def from_dense_numpy(cls, obj, dim_names=None):
"""
Convert numpy.ndarray to arrow::SparseCOOTensor
+
+ Parameters
+ ----------
+ obj : numpy.ndarray
+ Data used to populate the rows.
+ dim_names : list[str], optional
+ Names of the dimensions.
+
+ Returns
+ -------
+ pyarrow.SparseCOOTensor
"""
return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
@@ -549,6 +560,18 @@ shape: {0.shape}""".format(self)
return self.stp.size()
def dim_name(self, i):
+ """
+ Returns the name of the i-th tensor dimension.
+
+ Parameters
+ ----------
+ i : int
+ The physical index of the tensor dimension.
+
+ Returns
+ -------
+ str
+ """
return frombytes(self.stp.dim_name(i))
@property
@@ -601,6 +624,10 @@ shape: {0.shape}""".format(self)
The dense numpy array that should be converted.
dim_names : list, optional
The names of the dimensions.
+
+ Returns
+ -------
+ pyarrow.SparseCSRMatrix
"""
return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
@@ -781,6 +808,18 @@ shape: {0.shape}""".format(self)
return self.stp.size()
def dim_name(self, i):
+ """
+ Returns the name of the i-th tensor dimension.
+
+ Parameters
+ ----------
+ i : int
+ The physical index of the tensor dimension.
+
+ Returns
+ -------
+ str
+ """
return frombytes(self.stp.dim_name(i))
@property
@@ -816,6 +855,17 @@ shape: {0.shape}""".format(self)
def from_dense_numpy(cls, obj, dim_names=None):
"""
Convert numpy.ndarray to arrow::SparseCSCMatrix
+
+ Parameters
+ ----------
+ obj : numpy.ndarray
+ Data used to populate the rows.
+ dim_names : list[str], optional
+ Names of the dimensions.
+
+ Returns
+ -------
+ pyarrow.SparseCSCMatrix
"""
return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
@@ -997,6 +1047,18 @@ shape: {0.shape}""".format(self)
return self.stp.size()
def dim_name(self, i):
+ """
+ Returns the name of the i-th tensor dimension.
+
+ Parameters
+ ----------
+ i : int
+ The physical index of the tensor dimension.
+
+ Returns
+ -------
+ str
+ """
return frombytes(self.stp.dim_name(i))
@property
@@ -1040,6 +1102,17 @@ shape: {0.shape}""".format(self)
def from_dense_numpy(cls, obj, dim_names=None):
"""
Convert numpy.ndarray to arrow::SparseCSFTensor
+
+ Parameters
+ ----------
+ obj : numpy.ndarray
+ Data used to populate the rows.
+ dim_names : list[str], optional
+ Names of the dimensions.
+
+ Returns
+ -------
+ pyarrow.SparseCSFTensor
"""
return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
@@ -1190,6 +1263,18 @@ shape: {0.shape}""".format(self)
return self.stp.size()
def dim_name(self, i):
+ """
+ Returns the name of the i-th tensor dimension.
+
+ Parameters
+ ----------
+ i : int
+ The physical index of the tensor dimension.
+
+ Returns
+ -------
+ str
+ """
return frombytes(self.stp.dim_name(i))
@property
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 12ad2fc4b6..f2dd59a0f1 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -199,6 +199,15 @@ cdef class DataType(_Weakrefable):
self.pep3118_format = _datatype_to_pep3118(self.type)
cpdef Field field(self, i):
+ """
+ Parameters
+ ----------
+ i : int
+
+ Returns
+ -------
+ pyarrow.Field
+ """
if not isinstance(i, int):
raise TypeError(f"Expected int index, got type '{type(i)}'")
cdef int index = <int> _normalize_index(i, self.type.num_fields())
@@ -1886,6 +1895,15 @@ cdef class KeyValueMetadata(_Metadata, Mapping):
return self.wrapped
def equals(self, KeyValueMetadata other):
+ """
+ Parameters
+ ----------
+ other : pyarrow.KeyValueMetadata
+
+ Returns
+ -------
+ bool
+ """
return self.metadata.Equals(deref(other.wrapped))
def __repr__(self):
@@ -1925,9 +1943,27 @@ cdef class KeyValueMetadata(_Metadata, Mapping):
return KeyValueMetadata, (list(self.items()),)
def key(self, i):
+ """
+ Parameters
+ ----------
+ i : int
+
+ Returns
+ -------
+ byte
+ """
return self.metadata.key(i)
def value(self, i):
+ """
+ Parameters
+ ----------
+ i : int
+
+ Returns
+ -------
+ byte
+ """
return self.metadata.value(i)
def keys(self):
@@ -1943,6 +1979,15 @@ cdef class KeyValueMetadata(_Metadata, Mapping):
yield (self.metadata.key(i), self.metadata.value(i))
def get_all(self, key):
+ """
+ Parameters
+ ----------
+ key : str
+
+ Returns
+ -------
+ list[byte]
+ """
key = tobytes(key)
return [v for k, v in self.items() if k == key]