[arrow] branch main updated: GH-37217: [Python] Add missing docstrings to Cython (#37218)

kou Thu, 17 Aug 2023 20:03:41 -0700

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new ca1a0eb2dd GH-37217: [Python] Add missing docstrings to Cython (#37218)
ca1a0eb2dd is described below

commit ca1a0eb2dde5878275aa433384c38e33b4329260
Author: Dane Pitkin <[email protected]>
AuthorDate: Thu Aug 17 23:03:28 2023 -0400

    GH-37217: [Python] Add missing docstrings to Cython (#37218)
    
    ### Rationale for this change
    
    The Cython 3.0.0 upgrade https://github.com/apache/arrow/pull/37097 is 
triggering numpydoc errors for these missing docstrings.
    
    ### What changes are included in this PR?
    
    * Docstrings added to Cython functions that omitted them
    
    ### Are these changes tested?
    
    Yes, locally.
    
    ### Are there any user-facing changes?
    
    User-facing documentation is added.
    * Closes: #37217
    
    Lead-authored-by: Dane Pitkin <[email protected]>
    Co-authored-by: Dane Pitkin <[email protected]>
    Co-authored-by: Alenka Frim <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 python/pyarrow/_compute.pyx         |   9 +++
 python/pyarrow/_csv.pyx             |  27 ++++++++
 python/pyarrow/_dataset.pyx         |  74 ++++++++++++++++++++
 python/pyarrow/_dataset_orc.pyx     |   9 +++
 python/pyarrow/_dataset_parquet.pyx |  41 +++++++++++
 python/pyarrow/_fs.pyx              |   9 +++
 python/pyarrow/_json.pyx            |  18 +++++
 python/pyarrow/_parquet.pyx         |  92 ++++++++++++++++++++++++
 python/pyarrow/array.pxi            |  33 +++++++++
 python/pyarrow/gandiva.pyx          | 135 ++++++++++++++++++++++++++++++++++++
 python/pyarrow/io.pxi               |  54 +++++++++++++++
 python/pyarrow/scalar.pxi           |   9 +++
 python/pyarrow/table.pxi            |  11 +++
 python/pyarrow/tensor.pxi           |  85 +++++++++++++++++++++++
 python/pyarrow/types.pxi            |  45 ++++++++++++
 15 files changed, 651 insertions(+)

diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index bc3b9e8c55..453f487c4d 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -2345,6 +2345,15 @@ cdef class Expression(_Weakrefable):
         return self.expr
 
     def equals(self, Expression other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.Expression
+
+        Returns
+        -------
+        bool
+        """
         return self.expr.Equals(other.unwrap())
 
     def __str__(self):
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 0b72f5249f..e532d8d8ab 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -290,6 +290,15 @@ cdef class ReadOptions(_Weakrefable):
         check_status(deref(self.options).Validate())
 
     def equals(self, ReadOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.csv.ReadOptions
+
+        Returns
+        -------
+        bool
+        """
         return (
             self.use_threads == other.use_threads and
             self.block_size == other.block_size and
@@ -536,6 +545,15 @@ cdef class ParseOptions(_Weakrefable):
         check_status(deref(self.options).Validate())
 
     def equals(self, ParseOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.csv.ParseOptions
+
+        Returns
+        -------
+        bool
+        """
         return (
             self.delimiter == other.delimiter and
             self.quote_char == other.quote_char and
@@ -1042,6 +1060,15 @@ cdef class ConvertOptions(_Weakrefable):
         check_status(deref(self.options).Validate())
 
     def equals(self, ConvertOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.csv.ConvertOptions
+
+        Returns
+        -------
+        bool
+        """
         return (
             self.check_utf8 == other.check_utf8 and
             self.column_types == other.column_types and
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index badf6e4a4c..8f5688de29 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -1992,9 +1992,27 @@ cdef class IpcFileFormat(FileFormat):
         self.init(shared_ptr[CFileFormat](new CIpcFileFormat()))
 
     def equals(self, IpcFileFormat other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.IpcFileFormat
+
+        Returns
+        -------
+        True
+        """
         return True
 
     def make_write_options(self, **kwargs):
+        """
+        Parameters
+        ----------
+        **kwargs : dict
+
+        Returns
+        -------
+        pyarrow.ipc.IpcWriteOptions
+        """
         cdef IpcFileWriteOptions opts = \
             <IpcFileWriteOptions> FileFormat.make_write_options(self)
         opts.write_options = IpcWriteOptions(**kwargs)
@@ -2071,6 +2089,15 @@ cdef class CsvFileFormat(FileFormat):
         self.csv_format = <CCsvFileFormat*> sp.get()
 
     def make_write_options(self, **kwargs):
+        """
+        Parameters
+        ----------
+        **kwargs : dict
+
+        Returns
+        -------
+        pyarrow.csv.WriteOptions
+        """
         cdef CsvFileWriteOptions opts = \
             <CsvFileWriteOptions> FileFormat.make_write_options(self)
         opts.write_options = WriteOptions(**kwargs)
@@ -2093,6 +2120,15 @@ cdef class CsvFileFormat(FileFormat):
             super()._set_default_fragment_scan_options(options)
 
     def equals(self, CsvFileFormat other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.CsvFileFormat
+
+        Returns
+        -------
+        bool
+        """
         return (
             self.parse_options.equals(other.parse_options) and
             self.default_fragment_scan_options ==
@@ -2165,6 +2201,15 @@ cdef class CsvFragmentScanOptions(FragmentScanOptions):
                 make_streamwrap_func(read_options.encoding, 'utf-8'))
 
     def equals(self, CsvFragmentScanOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.CsvFragmentScanOptions
+
+        Returns
+        -------
+        bool
+        """
         return (
             other and
             self.convert_options.equals(other.convert_options) and
@@ -2250,6 +2295,15 @@ cdef class JsonFileFormat(FileFormat):
             super()._set_default_fragment_scan_options(options)
 
     def equals(self, JsonFileFormat other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.JsonFileFormat
+
+        Returns
+        -------
+        bool
+        """
         return (other and
                 self.default_fragment_scan_options ==
                 other.default_fragment_scan_options)
@@ -2308,6 +2362,15 @@ cdef class JsonFragmentScanOptions(FragmentScanOptions):
         self.json_options.read_options = read_options.options
 
     def equals(self, JsonFragmentScanOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.JsonFragmentScanOptions
+
+        Returns
+        -------
+        bool
+        """
         return (
             other and
             self.read_options.equals(other.read_options) and
@@ -2353,6 +2416,17 @@ cdef class Partitioning(_Weakrefable):
         return False
 
     def parse(self, path):
+        """
+        Parse a path into a partition expression.
+
+        Parameters
+        ----------
+        path : str
+
+        Returns
+        -------
+        pyarrow.dataset.Expression
+        """
         cdef CResult[CExpression] result
         result = self.partitioning.Parse(tobytes(path))
         return Expression.wrap(GetResultValue(result))
diff --git a/python/pyarrow/_dataset_orc.pyx b/python/pyarrow/_dataset_orc.pyx
index 40a21ef546..a8cce33622 100644
--- a/python/pyarrow/_dataset_orc.pyx
+++ b/python/pyarrow/_dataset_orc.pyx
@@ -32,6 +32,15 @@ cdef class OrcFileFormat(FileFormat):
         self.init(shared_ptr[CFileFormat](new COrcFileFormat()))
 
     def equals(self, OrcFileFormat other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.OrcFileFormat
+
+        Returns
+        -------
+        True
+        """
         return True
 
     @property
diff --git a/python/pyarrow/_dataset_parquet.pyx 
b/python/pyarrow/_dataset_parquet.pyx
index 4ad0caec30..4de396f4f5 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -178,6 +178,15 @@ cdef class ParquetFileFormat(FileFormat):
         return parquet_read_options
 
     def make_write_options(self, **kwargs):
+        """
+        Parameters
+        ----------
+        **kwargs : dict
+
+        Returns
+        -------
+        pyarrow.dataset.FileWriteOptions
+        """
         opts = FileFormat.make_write_options(self)
         (<ParquetFileWriteOptions> opts).update(**kwargs)
         return opts
@@ -189,6 +198,15 @@ cdef class ParquetFileFormat(FileFormat):
             super()._set_default_fragment_scan_options(options)
 
     def equals(self, ParquetFileFormat other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.ParquetFileFormat
+
+        Returns
+        -------
+        bool
+        """
         return (
             self.read_options.equals(other.read_options) and
             self.default_fragment_scan_options ==
@@ -502,6 +520,15 @@ cdef class ParquetReadOptions(_Weakrefable):
             self._coerce_int96_timestamp_unit = TimeUnit_NANO
 
     def equals(self, ParquetReadOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.ParquetReadOptions
+
+        Returns
+        -------
+        bool
+        """
         return (self.dictionary_columns == other.dictionary_columns and
                 self.coerce_int96_timestamp_unit ==
                 other.coerce_int96_timestamp_unit)
@@ -527,6 +554,11 @@ cdef class ParquetFileWriteOptions(FileWriteOptions):
         object _properties
 
     def update(self, **kwargs):
+        """
+        Parameters
+        ----------
+        **kwargs : dict
+        """
         arrow_fields = {
             "use_deprecated_int96_timestamps",
             "coerce_timestamps",
@@ -720,6 +752,15 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
         self.reader_properties().set_thrift_container_size_limit(size)
 
     def equals(self, ParquetFragmentScanOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.dataset.ParquetFragmentScanOptions
+
+        Returns
+        -------
+        bool
+        """
         attrs = (
             self.use_buffered_stream, self.buffer_size, self.pre_buffer,
             self.thrift_string_size_limit, self.thrift_container_size_limit)
diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx
index 69105afc2f..dbd7ebe5e4 100644
--- a/python/pyarrow/_fs.pyx
+++ b/python/pyarrow/_fs.pyx
@@ -505,6 +505,15 @@ cdef class FileSystem(_Weakrefable):
         return self.wrapped
 
     def equals(self, FileSystem other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.fs.FileSystem
+
+        Returns
+        -------
+        bool
+        """
         return self.fs.Equals(other.unwrap())
 
     def __eq__(self, other):
diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx
index 70cde6e23f..d36dad67ab 100644
--- a/python/pyarrow/_json.pyx
+++ b/python/pyarrow/_json.pyx
@@ -83,6 +83,15 @@ cdef class ReadOptions(_Weakrefable):
         )
 
     def equals(self, ReadOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.json.ReadOptions
+
+        Returns
+        -------
+        bool
+        """
         return (
             self.use_threads == other.use_threads and
             self.block_size == other.block_size
@@ -212,6 +221,15 @@ cdef class ParseOptions(_Weakrefable):
         self.options.unexpected_field_behavior = v
 
     def equals(self, ParseOptions other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.json.ParseOptions
+
+        Returns
+        -------
+        bool
+        """
         return (
             self.explicit_schema == other.explicit_schema and
             self.newlines_in_values == other.newlines_in_values and
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 4448f359ac..50b4ed8e86 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -1183,6 +1183,22 @@ cdef class ParquetReader(_Weakrefable):
              FileDecryptionProperties decryption_properties=None,
              thrift_string_size_limit=None,
              thrift_container_size_limit=None):
+        """
+        Open a parquet file for reading.
+
+        Parameters
+        ----------
+        source : str, pathlib.Path, pyarrow.NativeFile, or file-like object
+        use_memory_map : bool, default False
+        read_dictionary : iterable[int or str], optional
+        metadata : FileMetaData, optional
+        buffer_size : int, default 0
+        pre_buffer : bool, default False
+        coerce_int96_timestamp_unit : str, optional
+        decryption_properties : FileDecryptionProperties, optional
+        thrift_string_size_limit : int, optional
+        thrift_container_size_limit : int, optional
+        """
         cdef:
             shared_ptr[CFileMetaData] c_metadata
             CReaderProperties properties = default_reader_properties()
@@ -1285,13 +1301,35 @@ cdef class ParquetReader(_Weakrefable):
         return self.reader.get().num_row_groups()
 
     def set_use_threads(self, bint use_threads):
+        """
+        Parameters
+        ----------
+        use_threads : bool
+        """
         self.reader.get().set_use_threads(use_threads)
 
     def set_batch_size(self, int64_t batch_size):
+        """
+        Parameters
+        ----------
+        batch_size : int64
+        """
         self.reader.get().set_batch_size(batch_size)
 
     def iter_batches(self, int64_t batch_size, row_groups, column_indices=None,
                      bint use_threads=True):
+        """
+        Parameters
+        ----------
+        batch_size : int64
+        row_groups : list[int]
+        column_indices : list[int], optional
+        use_threads : bool, default True
+
+        Yields
+        ------
+        next : RecordBatch
+        """
         cdef:
             vector[int] c_row_groups
             vector[int] c_column_indices
@@ -1336,10 +1374,32 @@ cdef class ParquetReader(_Weakrefable):
 
     def read_row_group(self, int i, column_indices=None,
                        bint use_threads=True):
+        """
+        Parameters
+        ----------
+        i : int
+        column_indices : list[int], optional
+        use_threads : bool, default True
+
+        Returns
+        -------
+        table : pyarrow.Table
+        """
         return self.read_row_groups([i], column_indices, use_threads)
 
     def read_row_groups(self, row_groups not None, column_indices=None,
                         bint use_threads=True):
+        """
+        Parameters
+        ----------
+        row_groups : list[int]
+        column_indices : list[int], optional
+        use_threads : bool, default True
+
+        Returns
+        -------
+        table : pyarrow.Table
+        """
         cdef:
             shared_ptr[CTable] ctable
             vector[int] c_row_groups
@@ -1366,6 +1426,16 @@ cdef class ParquetReader(_Weakrefable):
         return pyarrow_wrap_table(ctable)
 
     def read_all(self, column_indices=None, bint use_threads=True):
+        """
+        Parameters
+        ----------
+        column_indices : list[int], optional
+        use_threads : bool, default True
+
+        Returns
+        -------
+        table : pyarrow.Table
+        """
         cdef:
             shared_ptr[CTable] ctable
             vector[int] c_column_indices
@@ -1387,6 +1457,16 @@ cdef class ParquetReader(_Weakrefable):
         return pyarrow_wrap_table(ctable)
 
     def scan_contents(self, column_indices=None, batch_size=65536):
+        """
+        Parameters
+        ----------
+        column_indices : list[int], optional
+        batch_size : int32, default 65536
+
+        Returns
+        -------
+        num_rows : int64
+        """
         cdef:
             vector[int] c_column_indices
             int32_t c_batch_size
@@ -1434,6 +1514,18 @@ cdef class ParquetReader(_Weakrefable):
         return self._column_idx_map[tobytes(column_name)]
 
     def read_column(self, int column_index):
+        """
+        Read the column at the specified index.
+
+        Parameters
+        ----------
+        column_index : int
+            Index of the column.
+
+        Returns
+        -------
+        column : pyarrow.ChunkedArray
+        """
         cdef shared_ptr[CChunkedArray] out
         with nogil:
             check_status(self.reader.get()
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 2f8959cd72..ce4eafd8e3 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1265,6 +1265,17 @@ cdef class Array(_PandasConvertible):
         return frombytes(result, safe=True)
 
     def format(self, **kwargs):
+        """
+        DEPRECATED, use pyarrow.Array.to_string
+
+        Parameters
+        ----------
+        **kwargs : dict
+
+        Returns
+        -------
+        str
+        """
         import warnings
         warnings.warn('Array.format is deprecated, use Array.to_string')
         return self.to_string(**kwargs)
@@ -1281,6 +1292,15 @@ cdef class Array(_PandasConvertible):
             return NotImplemented
 
     def equals(Array self, Array other not None):
+        """
+        Parameters
+        ----------
+        other : pyarrow.Array
+
+        Returns
+        -------
+        bool
+        """
         return self.ap.Equals(deref(other.ap))
 
     def __len__(self):
@@ -2336,6 +2356,19 @@ cdef class UnionArray(Array):
     """
 
     def child(self, int pos):
+        """
+        DEPRECATED, use field() instead.
+
+        Parameters
+        ----------
+        pos : int
+            The physical index of the union child field (not its type code).
+
+        Returns
+        -------
+        field : pyarrow.Field
+            The given child field.
+        """
         import warnings
         warnings.warn("child is deprecated, use field", FutureWarning)
         return self.field(pos)
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index cc46bc760f..35bbf5018f 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -191,6 +191,19 @@ cdef class Projector(_Weakrefable):
         return self.projector.get().DumpIR().decode()
 
     def evaluate(self, RecordBatch batch, SelectionVector selection=None):
+        """
+        Evaluate the specified record batch and return the arrays at the
+        filtered positions.
+
+        Parameters
+        ----------
+        batch : pyarrow.RecordBatch
+        selection : pyarrow.gandiva.SelectionVector
+
+        Returns
+        -------
+        list[pyarrow.Array]
+        """
         cdef vector[shared_ptr[CArray]] results
         if selection is None:
             check_status(self.projector.get().Evaluate(
@@ -227,6 +240,19 @@ cdef class Filter(_Weakrefable):
         return self.filter.get().DumpIR().decode()
 
     def evaluate(self, RecordBatch batch, MemoryPool pool, dtype='int32'):
+        """
+        Evaluate the specified record batch and return a selection vector.
+
+        Parameters
+        ----------
+        batch : pyarrow.RecordBatch
+        pool : MemoryPool
+        dtype : DataType or str, default int32
+
+        Returns
+        -------
+        pyarrow.gandiva.SelectionVector
+        """
         cdef:
             DataType type = ensure_type(dtype)
             shared_ptr[CSelectionVector] selection
@@ -252,6 +278,18 @@ cdef class Filter(_Weakrefable):
 cdef class TreeExprBuilder(_Weakrefable):
 
     def make_literal(self, value, dtype):
+        """
+        Create a node on a literal.
+
+        Parameters
+        ----------
+        value : a literal value
+        dtype : DataType
+
+        Returns
+        -------
+        pyarrow.gandiva.Node
+        """
         cdef:
             DataType type = ensure_type(dtype)
             shared_ptr[CNode] r
@@ -289,6 +327,19 @@ cdef class TreeExprBuilder(_Weakrefable):
 
     def make_expression(self, Node root_node not None,
                         Field return_field not None):
+        """
+        Create an expression with the specified root_node,
+        and the result written to result_field.
+
+        Parameters
+        ----------
+        root_node : pyarrow.gandiva.Node
+        return_field : pyarrow.Field
+
+        Returns
+        -------
+        pyarrow.gandiva.Expression
+        """
         cdef shared_ptr[CGandivaExpression] r = TreeExprBuilder_MakeExpression(
             root_node.node, return_field.sp_field)
         cdef Expression expression = Expression()
@@ -296,6 +347,19 @@ cdef class TreeExprBuilder(_Weakrefable):
         return expression
 
     def make_function(self, name, children, DataType return_type):
+        """
+        Create a node with a function.
+
+        Parameters
+        ----------
+        name : str
+        children : pyarrow.gandiva.NodeVector
+        return_type : DataType
+
+        Returns
+        -------
+        pyarrow.gandiva.Node
+        """
         cdef c_vector[shared_ptr[CNode]] c_children
         cdef Node child
         for child in children:
@@ -307,17 +371,53 @@ cdef class TreeExprBuilder(_Weakrefable):
         return Node.create(r)
 
     def make_field(self, Field field not None):
+        """
+        Create a node with an Arrow field.
+
+        Parameters
+        ----------
+        field : pyarrow.Field
+
+        Returns
+        -------
+        pyarrow.gandiva.Node
+        """
         cdef shared_ptr[CNode] r = TreeExprBuilder_MakeField(field.sp_field)
         return Node.create(r)
 
     def make_if(self, Node condition not None, Node this_node not None,
                 Node else_node not None, DataType return_type not None):
+        """
+        Create a node with an if-else expression.
+
+        Parameters
+        ----------
+        condition : pyarrow.gandiva.Node
+        this_node : pyarrow.gandiva.Node
+        else_node : pyarrow.gandiva.Node
+        return_type : DataType
+
+        Returns
+        -------
+        pyarrow.gandiva.Node
+        """
         cdef shared_ptr[CNode] r = TreeExprBuilder_MakeIf(
             condition.node, this_node.node, else_node.node,
             return_type.sp_type)
         return Node.create(r)
 
     def make_and(self, children):
+        """
+        Create a Node with a boolean AND expression.
+
+        Parameters
+        ----------
+        children : list[pyarrow.gandiva.Node]
+
+        Returns
+        -------
+        pyarrow.gandiva.Node
+        """
         cdef c_vector[shared_ptr[CNode]] c_children
         cdef Node child
         for child in children:
@@ -328,6 +428,17 @@ cdef class TreeExprBuilder(_Weakrefable):
         return Node.create(r)
 
     def make_or(self, children):
+        """
+        Create a Node with a boolean OR expression.
+
+        Parameters
+        ----------
+        children : list[pyarrow.gandiva.Node]
+
+        Returns
+        -------
+        pyarrow.gandiva.Node
+        """
         cdef c_vector[shared_ptr[CNode]] c_children
         cdef Node child
         for child in children:
@@ -420,6 +531,19 @@ cdef class TreeExprBuilder(_Weakrefable):
         return Node.create(r)
 
     def make_in_expression(self, Node node not None, values, dtype):
+        """
+        Create a Node with an IN expression.
+
+        Parameters
+        ----------
+        node : pyarrow.gandiva.Node
+        values : iterable
+        dtype : DataType
+
+        Returns
+        -------
+        pyarrow.gandiva.Node
+        """
         cdef DataType type = ensure_type(dtype)
 
         if type.id == _Type_INT32:
@@ -444,6 +568,17 @@ cdef class TreeExprBuilder(_Weakrefable):
             raise TypeError("Data type " + str(dtype) + " not supported.")
 
     def make_condition(self, Node condition not None):
+        """
+        Create a condition with the specified node.
+
+        Parameters
+        ----------
+        condition : pyarrow.gandiva.Node
+
+        Returns
+        -------
+        pyarrow.gandiva.Condition
+        """
         cdef shared_ptr[CCondition] r = TreeExprBuilder_MakeCondition(
             condition.node)
         return Condition.create(r)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 2a78f7e795..e3018ab470 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -575,6 +575,14 @@ cdef class NativeFile(_Weakrefable):
         return line
 
     def read_buffer(self, nbytes=None):
+        """
+        Read from buffer.
+
+        Parameters
+        ----------
+        nbytes : int, optional
+            maximum number of bytes read
+        """
         cdef:
             int64_t c_nbytes
             int64_t bytes_read = 0
@@ -602,6 +610,14 @@ cdef class NativeFile(_Weakrefable):
         raise UnsupportedOperation()
 
     def writelines(self, lines):
+        """
+        Write lines to the file.
+
+        Parameters
+        ----------
+        lines : iterable
+            Iterable of bytes-like objects or exporters of buffer protocol
+        """
         self._assert_writable()
 
         for line in lines:
@@ -865,12 +881,35 @@ cdef class PythonFile(NativeFile):
             self.is_writable = True
 
     def truncate(self, pos=None):
+        """
+        Parameters
+        ----------
+        pos : int, optional
+        """
         self.handle.truncate(pos)
 
     def readline(self, size=None):
+        """
+        Read and return a line of bytes from the file.
+
+        If size is specified, read at most size bytes.
+
+        Parameters
+        ----------
+        size : int
+            Maximum number of bytes read
+        """
         return self.handle.readline(size)
 
     def readlines(self, hint=None):
+        """
+        Read lines of the file.
+
+        Parameters
+        ----------
+        hint : int
+            Maximum number of bytes read until we stop
+        """
         return self.handle.readlines(hint)
 
 
@@ -1146,16 +1185,31 @@ cdef class FixedSizeBufferWriter(NativeFile):
         self.is_writable = True
 
     def set_memcopy_threads(self, int num_threads):
+        """
+        Parameters
+        ----------
+        num_threads : int
+        """
         cdef CFixedSizeBufferWriter* writer = \
             <CFixedSizeBufferWriter*> self.output_stream.get()
         writer.set_memcopy_threads(num_threads)
 
     def set_memcopy_blocksize(self, int64_t blocksize):
+        """
+        Parameters
+        ----------
+        blocksize : int64
+        """
         cdef CFixedSizeBufferWriter* writer = \
             <CFixedSizeBufferWriter*> self.output_stream.get()
         writer.set_memcopy_blocksize(blocksize)
 
     def set_memcopy_threshold(self, int64_t threshold):
+        """
+        Parameters
+        ----------
+        threshold : int64
+        """
         cdef CFixedSizeBufferWriter* writer = \
             <CFixedSizeBufferWriter*> self.output_stream.get()
         writer.set_memcopy_threshold(threshold)
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index aff1c311ab..e19807ba56 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -123,6 +123,15 @@ cdef class Scalar(_Weakrefable):
         return str(self.as_py())
 
     def equals(self, Scalar other not None):
+        """
+        Parameters
+        ----------
+        other : pyarrow.Scalar
+
+        Returns
+        -------
+        bool
+        """
         return self.wrapped.get().Equals(other.unwrap().get()[0])
 
     def __eq__(self, other):
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index f08162089b..2eae38485d 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -160,6 +160,17 @@ cdef class ChunkedArray(_PandasConvertible):
         return frombytes(result, safe=True)
 
     def format(self, **kwargs):
+        """
+        DEPRECATED, use pyarrow.ChunkedArray.to_string
+
+        Parameters
+        ----------
+        **kwargs : dict
+
+        Returns
+        -------
+        str
+        """
         import warnings
         warnings.warn('ChunkedArray.format is deprecated, '
                       'use ChunkedArray.to_string')
diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi
index ba732879aa..1afce7f4a1 100644
--- a/python/pyarrow/tensor.pxi
+++ b/python/pyarrow/tensor.pxi
@@ -316,6 +316,17 @@ shape: {0.shape}""".format(self)
     def from_dense_numpy(cls, obj, dim_names=None):
         """
         Convert numpy.ndarray to arrow::SparseCOOTensor
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            Data used to populate the rows.
+        dim_names : list[str], optional
+            Names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCOOTensor
         """
         return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
 
@@ -549,6 +560,18 @@ shape: {0.shape}""".format(self)
         return self.stp.size()
 
     def dim_name(self, i):
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
         return frombytes(self.stp.dim_name(i))
 
     @property
@@ -601,6 +624,10 @@ shape: {0.shape}""".format(self)
             The dense numpy array that should be converted.
         dim_names : list, optional
             The names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCSRMatrix
         """
         return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
 
@@ -781,6 +808,18 @@ shape: {0.shape}""".format(self)
         return self.stp.size()
 
     def dim_name(self, i):
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
         return frombytes(self.stp.dim_name(i))
 
     @property
@@ -816,6 +855,17 @@ shape: {0.shape}""".format(self)
     def from_dense_numpy(cls, obj, dim_names=None):
         """
         Convert numpy.ndarray to arrow::SparseCSCMatrix
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            Data used to populate the rows.
+        dim_names : list[str], optional
+            Names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCSCMatrix
         """
         return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
 
@@ -997,6 +1047,18 @@ shape: {0.shape}""".format(self)
         return self.stp.size()
 
     def dim_name(self, i):
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
         return frombytes(self.stp.dim_name(i))
 
     @property
@@ -1040,6 +1102,17 @@ shape: {0.shape}""".format(self)
     def from_dense_numpy(cls, obj, dim_names=None):
         """
         Convert numpy.ndarray to arrow::SparseCSFTensor
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+            Data used to populate the rows.
+        dim_names : list[str], optional
+            Names of the dimensions.
+
+        Returns
+        -------
+        pyarrow.SparseCSFTensor
         """
         return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
 
@@ -1190,6 +1263,18 @@ shape: {0.shape}""".format(self)
         return self.stp.size()
 
     def dim_name(self, i):
+        """
+        Returns the name of the i-th tensor dimension.
+
+        Parameters
+        ----------
+        i : int
+            The physical index of the tensor dimension.
+
+        Returns
+        -------
+        str
+        """
         return frombytes(self.stp.dim_name(i))
 
     @property
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 12ad2fc4b6..f2dd59a0f1 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -199,6 +199,15 @@ cdef class DataType(_Weakrefable):
         self.pep3118_format = _datatype_to_pep3118(self.type)
 
     cpdef Field field(self, i):
+        """
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        pyarrow.Field
+        """
         if not isinstance(i, int):
             raise TypeError(f"Expected int index, got type '{type(i)}'")
         cdef int index = <int> _normalize_index(i, self.type.num_fields())
@@ -1886,6 +1895,15 @@ cdef class KeyValueMetadata(_Metadata, Mapping):
         return self.wrapped
 
     def equals(self, KeyValueMetadata other):
+        """
+        Parameters
+        ----------
+        other : pyarrow.KeyValueMetadata
+
+        Returns
+        -------
+        bool
+        """
         return self.metadata.Equals(deref(other.wrapped))
 
     def __repr__(self):
@@ -1925,9 +1943,27 @@ cdef class KeyValueMetadata(_Metadata, Mapping):
         return KeyValueMetadata, (list(self.items()),)
 
     def key(self, i):
+        """
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        byte
+        """
         return self.metadata.key(i)
 
     def value(self, i):
+        """
+        Parameters
+        ----------
+        i : int
+
+        Returns
+        -------
+        byte
+        """
         return self.metadata.value(i)
 
     def keys(self):
@@ -1943,6 +1979,15 @@ cdef class KeyValueMetadata(_Metadata, Mapping):
             yield (self.metadata.key(i), self.metadata.value(i))
 
     def get_all(self, key):
+        """
+        Parameters
+        ----------
+        key : str
+
+        Returns
+        -------
+        list[byte]
+        """
         key = tobytes(key)
         return [v for k, v in self.items() if k == key]

[arrow] branch main updated: GH-37217: [Python] Add missing docstrings to Cython (#37218)

Reply via email to