(arrow) branch main updated: GH-38944: [Python] Fix spelling (#38945)

jorisvandenbossche Fri, 01 Dec 2023 00:31:36 -0800

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 6101d12676 GH-38944: [Python] Fix spelling (#38945)
6101d12676 is described below

commit 6101d12676f4cfac52822e3dc13034306a4bd83b
Author: Josh Soref <[email protected]>
AuthorDate: Fri Dec 1 03:30:13 2023 -0500

    GH-38944: [Python] Fix spelling (#38945)
    
    
    
    ### Rationale for this change
    
    ### What changes are included in this PR?
    
    Spelling fixes to python/
    
    ### Are these changes tested?
    
    ### Are there any user-facing changes?
    
    * Closes: #38944
    
    Authored-by: Josh Soref <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 python/CMakeLists.txt                              |  2 +-
 python/examples/minimal_build/build_conda.sh       |  2 +-
 python/examples/minimal_build/build_venv.sh        |  2 +-
 python/pyarrow/_acero.pyx                          |  4 ++--
 python/pyarrow/_compute.pyx                        |  8 +++----
 python/pyarrow/_cuda.pyx                           |  2 +-
 python/pyarrow/_dataset.pyx                        |  4 ++--
 python/pyarrow/_dataset_parquet.pyx                |  6 ++---
 python/pyarrow/_parquet.pyx                        |  6 ++---
 python/pyarrow/acero.py                            |  2 +-
 python/pyarrow/array.pxi                           |  4 ++--
 python/pyarrow/dataset.py                          |  8 +++----
 python/pyarrow/includes/libarrow_python.pxd        |  2 +-
 python/pyarrow/interchange/column.py               |  2 +-
 python/pyarrow/interchange/from_dataframe.py       | 14 +++++------
 python/pyarrow/io.pxi                              |  8 +++----
 python/pyarrow/parquet/core.py                     | 10 ++++----
 python/pyarrow/src/arrow/python/datetime.h         |  2 +-
 python/pyarrow/src/arrow/python/inference.cc       |  2 +-
 python/pyarrow/src/arrow/python/python_to_arrow.cc |  6 ++---
 python/pyarrow/src/arrow/python/udf.cc             |  4 ++--
 python/pyarrow/table.pxi                           | 28 +++++++++++-----------
 python/pyarrow/tests/parquet/test_basic.py         |  2 +-
 python/pyarrow/tests/parquet/test_dataset.py       |  4 ++--
 python/pyarrow/tests/test_acero.py                 |  2 +-
 python/pyarrow/tests/test_array.py                 |  2 +-
 python/pyarrow/tests/test_compute.py               |  4 ++--
 python/pyarrow/tests/test_dataset.py               |  4 ++--
 python/pyarrow/tests/test_dataset_encryption.py    |  2 +-
 python/pyarrow/tests/test_fs.py                    | 14 +++++------
 python/pyarrow/tests/test_io.py                    |  2 +-
 python/pyarrow/tests/test_json.py                  |  2 +-
 python/pyarrow/tests/test_misc.py                  |  2 +-
 python/pyarrow/tests/test_pandas.py                |  2 +-
 python/pyarrow/tests/test_scalars.py               |  2 +-
 python/pyarrow/tests/test_substrait.py             |  2 +-
 python/pyarrow/tests/test_types.py                 |  2 +-
 python/pyarrow/tests/test_udf.py                   |  8 +++----
 python/pyarrow/util.py                             |  2 +-
 39 files changed, 93 insertions(+), 93 deletions(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 529265235c..3f810d2727 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -1,5 +1,5 @@
 # Licensed to the Apache Software Foundation (ASF) under one
-# or more cod ntributor license agreements.  See the NOTICE file
+# or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
diff --git a/python/examples/minimal_build/build_conda.sh 
b/python/examples/minimal_build/build_conda.sh
index cd0030ac5f..72c3a5f9ea 100755
--- a/python/examples/minimal_build/build_conda.sh
+++ b/python/examples/minimal_build/build_conda.sh
@@ -91,7 +91,7 @@ popd
 # Build and test Python library
 pushd $ARROW_ROOT/python
 
-rm -rf build/  # remove any pesky pre-existing build directory
+rm -rf build/  # remove any pesky preexisting build directory
 
 export 
CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}}
 export PYARROW_BUILD_TYPE=Debug
diff --git a/python/examples/minimal_build/build_venv.sh 
b/python/examples/minimal_build/build_venv.sh
index d0432049f7..3bd641d0e7 100755
--- a/python/examples/minimal_build/build_venv.sh
+++ b/python/examples/minimal_build/build_venv.sh
@@ -62,7 +62,7 @@ popd
 # Build and test Python library
 pushd $ARROW_ROOT/python
 
-rm -rf build/  # remove any pesky pre-existing build directory
+rm -rf build/  # remove any pesky preexisting build directory
 
 export 
CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}}
 export PYARROW_BUILD_TYPE=Debug
diff --git a/python/pyarrow/_acero.pyx b/python/pyarrow/_acero.pyx
index bb3196c86e..1c9b2f75c3 100644
--- a/python/pyarrow/_acero.pyx
+++ b/python/pyarrow/_acero.pyx
@@ -155,7 +155,7 @@ class ProjectNodeOptions(_ProjectNodeOptions):
         List of expressions to evaluate against the source batch. This must
         be scalar expressions.
     names : list of str, optional
-        List of names for each of the ouptut columns (same length as
+        List of names for each of the output columns (same length as
         `expressions`). If `names` is not provided, the string
         representations of exprs will be used.
     """
@@ -213,7 +213,7 @@ class AggregateNodeOptions(_AggregateNodeOptions):
     Parameters
     ----------
     aggregates : list of tuples
-        Aggregations which will be applied to the targetted fields.
+        Aggregations which will be applied to the targeted fields.
         Specified as a list of tuples, where each tuple is one aggregation
         specification and consists of: aggregation target column(s) followed
         by function name, aggregation function options object and the
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 51dfdbf8eb..a267d53599 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -1390,7 +1390,7 @@ class TakeOptions(_TakeOptions):
     ----------
     boundscheck : boolean, default True
         Whether to check indices are within bounds. If False and an
-        index is out of boundes, behavior is undefined (the process
+        index is out of bounds, behavior is undefined (the process
         may crash).
     """
 
@@ -1468,7 +1468,7 @@ cdef class _StructFieldOptions(FunctionOptions):
     def _set_options(self, indices):
 
         if isinstance(indices, (list, tuple)) and not len(indices):
-            # Allow empty indices; effecitively return same array
+            # Allow empty indices; effectively return same array
             self.wrapped.reset(
                 new CStructFieldOptions(<vector[int]>indices))
             return
@@ -2991,7 +2991,7 @@ def register_aggregate_function(func, function_name, 
function_doc, in_types, out
     This is often used with ordered or segmented aggregation where groups
     can be emit before accumulating all of the input data.
 
-    Note that currently the size of any input column can not exceed 2 GB
+    Note that currently the size of any input column cannot exceed 2 GB
     for a single segment (all groups combined).
 
     Parameters
@@ -3076,7 +3076,7 @@ def register_tabular_function(func, function_name, 
function_doc, in_types, out_t
     UdfContext and returning a generator of struct arrays.
     The in_types argument must be empty and the out_type argument
     specifies a schema. Each struct array must have field types
-    correspoding to the schema.
+    corresponding to the schema.
 
     Parameters
     ----------
diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx
index dc7f42c10b..ba799a105e 100644
--- a/python/pyarrow/_cuda.pyx
+++ b/python/pyarrow/_cuda.pyx
@@ -493,7 +493,7 @@ cdef class CudaBuffer(Buffer):
                     raise ValueError(
                         'requested more to copy than available from '
                         'device buffer')
-                # copy nbytes starting from position to new host buffeer
+                # copy nbytes starting from position to new host buffer
                 c_nbytes = nbytes
             buf = allocate_buffer(c_nbytes, memory_pool=memory_pool,
                                   resizable=resizable)
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index d7d69965d0..029948a609 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -853,7 +853,7 @@ cdef class Dataset(_Weakrefable):
             Which suffix to add to right column names. This prevents confusion
             when the columns in left and right datasets have colliding names.
         right_suffix : str, default None
-            Which suffic to add to the left column names. This prevents 
confusion
+            Which suffix to add to the left column names. This prevents 
confusion
             when the columns in left and right datasets have colliding names.
         coalesce_keys : bool, default True
             If the duplicated keys should be omitted from one of the sides
@@ -1016,7 +1016,7 @@ cdef class FileSystemDataset(Dataset):
         elif not isinstance(root_partition, Expression):
             raise TypeError(
                 "Argument 'root_partition' has incorrect type (expected "
-                "Epression, got {0})".format(type(root_partition))
+                "Expression, got {0})".format(type(root_partition))
             )
 
         for fragment in fragments:
diff --git a/python/pyarrow/_dataset_parquet.pyx 
b/python/pyarrow/_dataset_parquet.pyx
index bd4151624d..f83b78d933 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -415,7 +415,7 @@ cdef class ParquetFileFragment(FileFragment):
             the Parquet RowGroup statistics).
         schema : Schema, default None
             Schema to use when filtering row groups. Defaults to the
-            Fragment's phsyical schema
+            Fragment's physical schema
 
         Returns
         -------
@@ -450,7 +450,7 @@ cdef class ParquetFileFragment(FileFragment):
             the Parquet RowGroup statistics).
         schema : Schema, default None
             Schema to use when filtering row groups. Defaults to the
-            Fragment's phsyical schema
+            Fragment's physical schema
         row_group_ids : list of ints
             The row group IDs to include in the subset. Can only be specified
             if `filter` is None.
@@ -688,7 +688,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
     pre_buffer : bool, default True
         If enabled, pre-buffer the raw Parquet data instead of issuing one
         read per column chunk. This can improve performance on high-latency
-        filesystems (e.g. S3, GCS) by coalesing and issuing file reads in
+        filesystems (e.g. S3, GCS) by coalescing and issuing file reads in
         parallel using a background I/O thread pool.
         Set to False if you want to prioritize minimal memory usage
         over maximum speed.
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 737ba9d0a8..35344eb735 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -487,7 +487,7 @@ cdef class ColumnChunkMetaData(_Weakrefable):
 
     @property
     def total_compressed_size(self):
-        """Compresssed size in bytes (int)."""
+        """Compressed size in bytes (int)."""
         return self.metadata.total_compressed_size()
 
     @property
@@ -1655,7 +1655,7 @@ cdef shared_ptr[WriterProperties] 
_create_writer_properties(
         if use_byte_stream_split:
             if column_encoding is not None:
                 raise ValueError(
-                    "'use_byte_stream_split' can not be passed"
+                    "'use_byte_stream_split' cannot be passed"
                     "together with 'column_encoding'")
             else:
                 props.encoding(ParquetEncoding_BYTE_STREAM_SPLIT)
@@ -1667,7 +1667,7 @@ cdef shared_ptr[WriterProperties] 
_create_writer_properties(
                 column_encoding[column] = 'BYTE_STREAM_SPLIT'
             else:
                 raise ValueError(
-                    "'use_byte_stream_split' can not be passed"
+                    "'use_byte_stream_split' cannot be passed"
                     "together with 'column_encoding'")
 
     # column_encoding
diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py
index 0609e45753..a5583c9e65 100644
--- a/python/pyarrow/acero.py
+++ b/python/pyarrow/acero.py
@@ -221,7 +221,7 @@ def _perform_join(join_type, left_operand, left_keys,
                 # Do not include right table keys. As they would lead to 
duplicated keys
                 continue
             else:
-                # For all the other columns incude them as they are.
+                # For all the other columns include them as they are.
                 # Just recompute the suffixes that the join produced as the 
projection
                 # would lose them otherwise.
                 if (
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 2e97503822..9d62bed51f 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -254,7 +254,7 @@ def array(object obj, type=None, mask=None, size=None, 
from_pandas=None,
         schema_capsule, array_capsule = obj.__arrow_c_array__(requested_type)
         out_array = Array._import_from_c_capsule(schema_capsule, array_capsule)
         if type is not None and out_array.type != type:
-            # PyCapsule interface type coersion is best effort, so we need to
+            # PyCapsule interface type coercion is best effort, so we need to
             # check the type of the returned array and cast if necessary
             out_array = array.cast(type, safe=safe, memory_pool=memory_pool)
         return out_array
@@ -3415,7 +3415,7 @@ cdef class RunEndEncodedArray(Array):
         Find the physical offset of this REE array.
 
         This is the offset of the run that contains the value of the first
-        logical element of this array considering its offet.
+        logical element of this array considering its offset.
 
         This function uses binary-search, so it has a O(log N) cost.
         """
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index adf21814a2..9301a5fee5 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -169,7 +169,7 @@ def partitioning(schema=None, field_names=None, flavor=None,
     Returns
     -------
     Partitioning or PartitioningFactory
-        The partioning scheme
+        The partitioning scheme
 
     Examples
     --------
@@ -511,7 +511,7 @@ def parquet_dataset(metadata_path, schema=None, 
filesystem=None, format=None,
                     partitioning=None, partition_base_dir=None):
     """
     Create a FileSystemDataset from a `_metadata` file created via
-    `pyarrrow.parquet.write_metadata`.
+    `pyarrow.parquet.write_metadata`.
 
     Parameters
     ----------
@@ -534,7 +534,7 @@ def parquet_dataset(metadata_path, schema=None, 
filesystem=None, format=None,
     partitioning : Partitioning, PartitioningFactory, str, list of str
         The partitioning scheme specified with the ``partitioning()``
         function. A flavor string can be used as shortcut, and with a list of
-        field names a DirectionaryPartitioning will be inferred.
+        field names a DirectoryPartitioning will be inferred.
     partition_base_dir : str, optional
         For the purposes of applying the partitioning, paths will be
         stripped of the partition_base_dir. Files not matching the
@@ -630,7 +630,7 @@ RecordBatch or Table, iterable of RecordBatch, 
RecordBatchReader, or URI
     partitioning : Partitioning, PartitioningFactory, str, list of str
         The partitioning scheme specified with the ``partitioning()``
         function. A flavor string can be used as shortcut, and with a list of
-        field names a DirectionaryPartitioning will be inferred.
+        field names a DirectoryPartitioning will be inferred.
     partition_base_dir : str, optional
         For the purposes of applying the partitioning, paths will be
         stripped of the partition_base_dir. Files not matching the
diff --git a/python/pyarrow/includes/libarrow_python.pxd 
b/python/pyarrow/includes/libarrow_python.pxd
index b8a3041796..e3179062a1 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -263,7 +263,7 @@ cdef extern from "arrow/python/common.h" namespace 
"arrow::py":
 
 cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
     cdef cppclass SharedPtrNoGIL[T](shared_ptr[T]):
-        # This looks like the only way to satsify both Cython 2 and Cython 3
+        # This looks like the only way to satisfy both Cython 2 and Cython 3
         SharedPtrNoGIL& operator=(...)
     cdef cppclass UniquePtrNoGIL[T, DELETER=*](unique_ptr[T, DELETER]):
         UniquePtrNoGIL& operator=(...)
diff --git a/python/pyarrow/interchange/column.py 
b/python/pyarrow/interchange/column.py
index eaf7834d5b..e609e469b0 100644
--- a/python/pyarrow/interchange/column.py
+++ b/python/pyarrow/interchange/column.py
@@ -372,7 +372,7 @@ class _PyArrowColumn:
         """
         # In case of no missing values, we need to set ColumnNullType to
         # non nullable as in the current __dataframe__ protocol bit/byte masks
-        # can not be None
+        # cannot be None
         if self.null_count == 0:
             return ColumnNullType.NON_NULLABLE, None
         else:
diff --git a/python/pyarrow/interchange/from_dataframe.py 
b/python/pyarrow/interchange/from_dataframe.py
index 3767b18f2a..fcaec41e3d 100644
--- a/python/pyarrow/interchange/from_dataframe.py
+++ b/python/pyarrow/interchange/from_dataframe.py
@@ -86,20 +86,20 @@ def from_dataframe(df: DataFrameObject, allow_copy=True) -> 
pa.Table:
 
     >>> import pandas as pd
     >>> df = pd.DataFrame({
-    ...         "n_atendees": [100, 10, 1],
+    ...         "n_attendees": [100, 10, 1],
     ...         "country": ["Italy", "Spain", "Slovenia"],
     ...     })
     >>> df
-       n_atendees   country
-    0         100     Italy
-    1          10     Spain
-    2           1  Slovenia
+       n_attendees   country
+    0          100     Italy
+    1           10     Spain
+    2            1  Slovenia
     >>> from_dataframe(df)
     pyarrow.Table
-    n_atendees: int64
+    n_attendees: int64
     country: large_string
     ----
-    n_atendees: [[100,10,1]]
+    n_attendees: [[100,10,1]]
     country: [["Italy","Spain","Slovenia"]]
     """
     if isinstance(df, pa.Table):
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 24b4e003a2..3086845efa 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -1438,7 +1438,7 @@ cdef class Buffer(_Weakrefable):
 
     def __getreadbuffer__(self, Py_ssize_t idx, void **p):
         if idx != 0:
-            raise SystemError("accessing non-existent buffer segment")
+            raise SystemError("accessing nonexistent buffer segment")
         if p != NULL:
             p[0] = <void*> self.buffer.get().data()
         return self.size
@@ -1447,7 +1447,7 @@ cdef class Buffer(_Weakrefable):
         if not self.buffer.get().is_mutable():
             raise SystemError("trying to write an immutable buffer")
         if idx != 0:
-            raise SystemError("accessing non-existent buffer segment")
+            raise SystemError("accessing nonexistent buffer segment")
         if p != NULL:
             p[0] = <void*> self.buffer.get().data()
         return self.size
@@ -1629,7 +1629,7 @@ cdef class CompressedInputStream(NativeFile):
 
     Examples
     --------
-    Create an ouput stream wich compresses the data:
+    Create an output stream wich compresses the data:
 
     >>> import pyarrow as pa
     >>> data = b"Compressed stream"
@@ -1686,7 +1686,7 @@ cdef class CompressedOutputStream(NativeFile):
 
     Examples
     --------
-    Create an ouput stream wich compresses the data:
+    Create an output stream wich compresses the data:
 
     >>> import pyarrow as pa
     >>> data = b"Compressed stream"
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 096e960384..db22eb3293 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -828,7 +828,7 @@ use_byte_stream_split : bool or list, default False
     and should be combined with a compression codec.
 column_encoding : string or dict, default None
     Specify the encoding scheme on a per column basis.
-    Can only be used when when ``use_dictionary`` is set to False, and
+    Can only be used when ``use_dictionary`` is set to False, and
     cannot be used in combination with ``use_byte_stream_split``.
     Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
     'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
@@ -1906,7 +1906,7 @@ Examples
             warnings.warn(
                 "Specifying the 'schema' argument with 'use_legacy_dataset="
                 "True' is deprecated as of pyarrow 8.0.0. You can still "
-                "specify it in combination with 'use_legacy_dataet=False', "
+                "specify it in combination with 'use_legacy_dataset=False', "
                 "but in that case you need to specify a pyarrow.Schema "
                 "instead of a ParquetSchema.",
                 FutureWarning, stacklevel=2)
@@ -3272,13 +3272,13 @@ def write_to_dataset(table, root_path, 
partition_cols=None,
         passed, the filename will consist of a uuid.
         This option is only supported for use_legacy_dataset=True.
         When use_legacy_dataset=None and this option is specified,
-        use_legacy_datase will be set to True.
+        use_legacy_dataset will be set to True.
     filesystem : FileSystem, default None
         If nothing passed, will be inferred based on path.
         Path will try to be found in the local on-disk filesystem otherwise
         it will be parsed as an URI to determine the filesystem.
     use_legacy_dataset : bool
-        Default is False. Set to True to use the the legacy behaviour
+        Default is False. Set to True to use the legacy behaviour
         (this option is deprecated, and the legacy implementation will be
         removed in a future version). The legacy implementation still
         supports the `partition_filename_cb` keyword but is less efficient
@@ -3386,7 +3386,7 @@ def write_to_dataset(table, root_path, 
partition_cols=None,
         else:
             use_legacy_dataset = False
 
-    # Check for conflicting kewords
+    # Check for conflicting keywords
     msg_confl_0 = (
         "The '{0}' argument is not supported by use_legacy_dataset={2}. "
         "Use only '{1}' instead."
diff --git a/python/pyarrow/src/arrow/python/datetime.h 
b/python/pyarrow/src/arrow/python/datetime.h
index 327a61f3de..7346d6bc67 100644
--- a/python/pyarrow/src/arrow/python/datetime.h
+++ b/python/pyarrow/src/arrow/python/datetime.h
@@ -220,7 +220,7 @@ ARROW_PYTHON_EXPORT
 Result<PyObject*> MonthDayNanoIntervalArrayToPyList(
     const MonthDayNanoIntervalArray& array);
 
-/// \brief Convert the Scalar obect to a pyarrow.MonthDayNano (or None if
+/// \brief Convert the Scalar object to a pyarrow.MonthDayNano (or None if
 /// is isn't valid).
 ARROW_PYTHON_EXPORT
 Result<PyObject*> MonthDayNanoIntervalScalarToPyObject(
diff --git a/python/pyarrow/src/arrow/python/inference.cc 
b/python/pyarrow/src/arrow/python/inference.cc
index 3407b32720..9537aec574 100644
--- a/python/pyarrow/src/arrow/python/inference.cc
+++ b/python/pyarrow/src/arrow/python/inference.cc
@@ -623,7 +623,7 @@ class TypeInferrer {
 
     // XXX(wesm): In ARROW-4324 I added accounting to check whether
     // all of the non-null values have NumPy dtypes, but the
-    // total_count not not being properly incremented here
+    // total_count not being properly incremented here
     ++(*list_inferrer_).total_count_;
     return list_inferrer_->VisitDType(dtype, keep_going);
   }
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc 
b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index e474924998..23b92598e3 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -121,7 +121,7 @@ const MonthDayNanoAttrData 
MonthDayNanoTraits<MonthDayNanoField::kNanoseconds>::
      {"minutes", /*minutes_in_hours=*/60},
      {"seconds", /*seconds_in_minute=*/60},
      {"milliseconds", /*milliseconds_in_seconds*/ 1000},
-     {"microseconds", /*microseconds_in_millseconds=*/1000},
+     {"microseconds", /*microseconds_in_milliseconds=*/1000},
      {"nanoseconds", /*nanoseconds_in_microseconds=*/1000},
      {nullptr, 0}};
 
@@ -481,7 +481,7 @@ class PyValue {
 
   // The binary-like intermediate representation is PyBytesView because it 
keeps temporary
   // python objects alive (non-contiguous memoryview) and stores whether the 
original
-  // object was unicode encoded or not, which is used for unicode -> bytes 
coersion if
+  // object was unicode encoded or not, which is used for unicode -> bytes 
coercion if
   // there is a non-unicode object observed.
 
   static Status Convert(const BaseBinaryType*, const O&, I obj, PyBytesView& 
view) {
@@ -819,7 +819,7 @@ class PyListConverter : public ListConverter<T, 
PyConverter, PyConverterTrait> {
  protected:
   Status ValidateBuilder(const MapType*) {
     if (this->list_builder_->key_builder()->null_count() > 0) {
-      return Status::Invalid("Invalid Map: key field can not contain null 
values");
+      return Status::Invalid("Invalid Map: key field cannot contain null 
values");
     } else {
       return Status::OK();
     }
diff --git a/python/pyarrow/src/arrow/python/udf.cc 
b/python/pyarrow/src/arrow/python/udf.cc
index f7761a9277..e9b72a2592 100644
--- a/python/pyarrow/src/arrow/python/udf.cc
+++ b/python/pyarrow/src/arrow/python/udf.cc
@@ -275,7 +275,7 @@ struct PythonUdfHashAggregatorImpl : public 
HashUdfAggregator {
     }
   }
 
-  // same as ApplyGrouping in parition.cc
+  // same as ApplyGrouping in partition.cc
   // replicated the code here to avoid complicating the dependencies
   static Result<RecordBatchVector> ApplyGroupings(
       const ListArray& groupings, const std::shared_ptr<RecordBatch>& batch) {
@@ -600,7 +600,7 @@ Status RegisterScalarAggregateFunction(PyObject* function, 
UdfWrapperCallback cb
 /// \param options User provided udf options
 UdfOptions AdjustForHashAggregate(const UdfOptions& options) {
   UdfOptions hash_options;
-  // Append hash_ before the function name to seperate from the scalar
+  // Append hash_ before the function name to separate from the scalar
   // version
   hash_options.func_name = "hash_" + options.func_name;
   // Extend input types with group id. Group id is appended by the group
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 0fa913a219..bbed789553 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -449,7 +449,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> import pyarrow as pa
         >>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
         >>> animals = pa.chunked_array((
-        ...             ["Flamingo", "Parot", "Dog"],
+        ...             ["Flamingo", "Parrot", "Dog"],
         ...             ["Horse", "Brittle stars", "Centipede"]
         ...             ))
         >>> n_legs.equals(n_legs)
@@ -584,7 +584,7 @@ cdef class ChunkedArray(_PandasConvertible):
         --------
         >>> import pyarrow as pa
         >>> animals = pa.chunked_array((
-        ...             ["Flamingo", "Parot", "Dog"],
+        ...             ["Flamingo", "Parrot", "Dog"],
         ...             ["Horse", "Brittle stars", "Centipede"]
         ...             ))
         >>> animals.dictionary_encode()
@@ -594,7 +594,7 @@ cdef class ChunkedArray(_PandasConvertible):
           -- dictionary:
             [
               "Flamingo",
-              "Parot",
+              "Parrot",
               "Dog",
               "Horse",
               "Brittle stars",
@@ -610,7 +610,7 @@ cdef class ChunkedArray(_PandasConvertible):
           -- dictionary:
             [
               "Flamingo",
-              "Parot",
+              "Parrot",
               "Dog",
               "Horse",
               "Brittle stars",
@@ -1127,7 +1127,7 @@ cdef class ChunkedArray(_PandasConvertible):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> arr_1 = pa.array(["Flamingo", "Parot", "Dog"]).dictionary_encode()
+        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
         >>> arr_2 = pa.array(["Horse", "Brittle stars", 
"Centipede"]).dictionary_encode()
         >>> c_arr = pa.chunked_array([arr_1, arr_2])
         >>> c_arr
@@ -1137,7 +1137,7 @@ cdef class ChunkedArray(_PandasConvertible):
           -- dictionary:
             [
               "Flamingo",
-              "Parot",
+              "Parrot",
               "Dog"
             ]
           -- indices:
@@ -1167,7 +1167,7 @@ cdef class ChunkedArray(_PandasConvertible):
           -- dictionary:
             [
               "Flamingo",
-              "Parot",
+              "Parrot",
               "Dog",
               "Horse",
               "Brittle stars",
@@ -1183,7 +1183,7 @@ cdef class ChunkedArray(_PandasConvertible):
           -- dictionary:
             [
               "Flamingo",
-              "Parot",
+              "Parrot",
               "Dog",
               "Horse",
               "Brittle stars",
@@ -2804,7 +2804,7 @@ cdef class RecordBatch(_Tabular):
         >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle 
stars", "Centipede"])
         >>> names = ["n_legs", "animals"]
 
-        Construct a RecordBartch from pyarrow Arrays using names:
+        Construct a RecordBatch from pyarrow Arrays using names:
 
         >>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
         pyarrow.RecordBatch
@@ -2822,7 +2822,7 @@ cdef class RecordBatch(_Tabular):
         4       5  Brittle stars
         5     100      Centipede
 
-        Construct a RecordBartch from pyarrow Arrays using schema:
+        Construct a RecordBatch from pyarrow Arrays using schema:
 
         >>> my_schema = pa.schema([
         ...     pa.field('n_legs', pa.int64()),
@@ -3659,7 +3659,7 @@ cdef class Table(_Tabular):
         Examples
         --------
         >>> import pyarrow as pa
-        >>> arr_1 = pa.array(["Flamingo", "Parot", "Dog"]).dictionary_encode()
+        >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
         >>> arr_2 = pa.array(["Horse", "Brittle stars", 
"Centipede"]).dictionary_encode()
         >>> c_arr = pa.chunked_array([arr_1, arr_2])
         >>> table = pa.table([c_arr], names=["animals"])
@@ -3668,7 +3668,7 @@ cdef class Table(_Tabular):
         animals: dictionary<values=string, indices=int32, ordered=0>
         ----
         animals: [  -- dictionary:
-        ["Flamingo","Parot","Dog"]  -- indices:
+        ["Flamingo","Parrot","Dog"]  -- indices:
         [0,1,2],  -- dictionary:
         ["Horse","Brittle stars","Centipede"]  -- indices:
         [0,1,2]]
@@ -3680,9 +3680,9 @@ cdef class Table(_Tabular):
         animals: dictionary<values=string, indices=int32, ordered=0>
         ----
         animals: [  -- dictionary:
-        ["Flamingo","Parot","Dog","Horse","Brittle stars","Centipede"]  -- 
indices:
+        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- 
indices:
         [0,1,2],  -- dictionary:
-        ["Flamingo","Parot","Dog","Horse","Brittle stars","Centipede"]  -- 
indices:
+        ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- 
indices:
         [3,4,5]]
         """
         cdef:
diff --git a/python/pyarrow/tests/parquet/test_basic.py 
b/python/pyarrow/tests/parquet/test_basic.py
index 26c52b1cc5..83e6ebeb7a 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -661,7 +661,7 @@ def test_write_error_deletes_incomplete_file(tempdir):
 
 @parametrize_legacy_dataset
 def test_read_non_existent_file(tempdir, use_legacy_dataset):
-    path = 'non-existent-file.parquet'
+    path = 'nonexistent-file.parquet'
     try:
         pq.read_table(path, use_legacy_dataset=use_legacy_dataset)
     except Exception as e:
diff --git a/python/pyarrow/tests/parquet/test_dataset.py 
b/python/pyarrow/tests/parquet/test_dataset.py
index be27c71b81..a9e99d5d65 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -1622,7 +1622,7 @@ def test_read_table_schema(tempdir):
     expected = pa.table({'a': [1, 2, 3]}, schema=schema)
     assert result.equals(expected)
 
-    # reading multiple fiels
+    # reading multiple fields
     result = pq.read_table(tempdir, schema=schema)
     expected = pa.table({'a': [1, 2, 3, 1, 2, 3]}, schema=schema)
     assert result.equals(expected)
@@ -1796,7 +1796,7 @@ def 
test_parquet_write_to_dataset_deprecated_properties(tempdir):
 
 
 @pytest.mark.dataset
-def test_parquet_write_to_dataset_unsupported_keywards_in_legacy(tempdir):
+def test_parquet_write_to_dataset_unsupported_keywords_in_legacy(tempdir):
     table = pa.table({'a': [1, 2, 3]})
     path = tempdir / 'data.parquet'
 
diff --git a/python/pyarrow/tests/test_acero.py 
b/python/pyarrow/tests/test_acero.py
index 988e9b6e31..a436060130 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -265,7 +265,7 @@ def test_order_by():
     expected = pa.table({"a": [3, 2, 4, 1], "b": [None, 3, 2, 1]})
     assert result.equals(expected)
 
-    # emtpy ordering
+    # empty ordering
     ord_opts = OrderByNodeOptions([])
     decl = Declaration.from_sequence([table_source, Declaration("order_by", 
ord_opts)])
     with pytest.raises(
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index 2f9727922b..599d15d023 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -989,7 +989,7 @@ def test_list_array_types_from_arrays_fail(list_array_type, 
list_type_factory):
     reconstructed_arr = list_array_type.from_arrays(arr.offsets, arr.values)
     assert reconstructed_arr.to_pylist() == [[0], [], [0, None], [0]]
 
-    # Manually specifiying offsets (with nulls) is same as mask at top level
+    # Manually specifying offsets (with nulls) is same as mask at top level
     reconstructed_arr = list_array_type.from_arrays(offsets, arr.values)
     assert arr == reconstructed_arr
     reconstructed_arr = list_array_type.from_arrays(arr.offsets,
diff --git a/python/pyarrow/tests/test_compute.py 
b/python/pyarrow/tests/test_compute.py
index 4b2144d702..067d96a821 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -2385,7 +2385,7 @@ def _check_temporal_rounding(ts, values, unit):
 
         # Check rounding with calendar_based_origin=True.
         # Note: rounding to month is not supported in Pandas so we can't
-        # approximate this functionallity and exclude unit == "day".
+        # approximate this functionality and exclude unit == "day".
         if unit != "day":
             options = pc.RoundTemporalOptions(
                 value, unit, calendar_based_origin=True)
@@ -3501,7 +3501,7 @@ def test_expression_call_function():
     assert str(pc.add(field, 1)) == "add(field, 1)"
     assert str(pc.add(field, pa.scalar(1))) == "add(field, 1)"
 
-    # Invalid pc.scalar input gives original erorr message
+    # Invalid pc.scalar input gives original error message
     msg = "only other expressions allowed as arguments"
     with pytest.raises(TypeError, match=msg):
         pc.add(field, object)
diff --git a/python/pyarrow/tests/test_dataset.py 
b/python/pyarrow/tests/test_dataset.py
index c6967326b3..d5e7015a5d 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2249,7 +2249,7 @@ def test_construct_from_list_of_files(tempdir, 
dataset_reader):
 
 @pytest.mark.parquet
 def test_construct_from_list_of_mixed_paths_fails(mockfs):
-    # isntantiate from a list of mixed paths
+    # instantiate from a list of mixed paths
     files = [
         'subdir/1/xxx/file0.parquet',
         'subdir/1/xxx/doesnt-exist.parquet',
@@ -2260,7 +2260,7 @@ def test_construct_from_list_of_mixed_paths_fails(mockfs):
 
 @pytest.mark.parquet
 def test_construct_from_mixed_child_datasets(mockfs):
-    # isntantiate from a list of mixed paths
+    # instantiate from a list of mixed paths
     a = ds.dataset(['subdir/1/xxx/file0.parquet',
                     'subdir/2/yyy/file1.parquet'], filesystem=mockfs)
     b = ds.dataset('subdir', filesystem=mockfs)
diff --git a/python/pyarrow/tests/test_dataset_encryption.py 
b/python/pyarrow/tests/test_dataset_encryption.py
index b5d6f510db..d25b22990a 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -123,7 +123,7 @@ def test_dataset_encryption_decryption():
         filesystem=mockfs,
     )
 
-    # read without descryption config -> should error is dataset was properly 
encrypted
+    # read without decryption config -> should error is dataset was properly 
encrypted
     pformat = pa.dataset.ParquetFileFormat()
     with pytest.raises(IOError, match=r"no decryption"):
         ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index c540bf9681..1002e13471 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1303,12 +1303,12 @@ def test_s3_proxy_options(monkeypatch, pickle_module):
     # Missing port
     with pytest.raises(KeyError):
         S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost'})
-    # Invalid proxy URI (invalid scheme htttps)
+    # Invalid proxy URI (invalid scheme httpsB)
     with pytest.raises(pa.ArrowInvalid):
-        S3FileSystem(proxy_options='htttps://localhost:9000')
-    # Invalid proxy_options dict (invalid scheme htttps)
+        S3FileSystem(proxy_options='httpsB://localhost:9000')
+    # Invalid proxy_options dict (invalid scheme httpA)
     with pytest.raises(pa.ArrowInvalid):
-        S3FileSystem(proxy_options={'scheme': 'htttp', 'host': 'localhost',
+        S3FileSystem(proxy_options={'scheme': 'httpA', 'host': 'localhost',
                                     'port': 8999})
 
 
@@ -1690,11 +1690,11 @@ def test_s3_real_aws_region_selection():
     assert fs.region == 'us-east-2'
     # Reading from the wrong region may still work for public buckets...
 
-    # Non-existent bucket (hopefully, otherwise need to fix this test)
+    # Nonexistent bucket (hopefully, otherwise need to fix this test)
     with pytest.raises(IOError, match="Bucket '.*' not found"):
-        FileSystem.from_uri('s3://x-arrow-non-existent-bucket')
+        FileSystem.from_uri('s3://x-arrow-nonexistent-bucket')
     fs, path = FileSystem.from_uri(
-        's3://x-arrow-non-existent-bucket?region=us-east-3')
+        's3://x-arrow-nonexistent-bucket?region=us-east-3')
     assert fs.region == 'us-east-3'
 
 
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 9609e4066a..071962af29 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -229,7 +229,7 @@ def test_python_file_read_buffer():
         buf = f.read_buffer(length)
         assert len(buf) == length
         assert memoryview(buf).tobytes() == dst_buf[:length]
-        # buf should point to the same memory, so modyfing it
+        # buf should point to the same memory, so modifying it
         memoryview(buf)[0] = ord(b'x')
         # should modify the original
         assert dst_buf[0] == ord(b'x')
diff --git a/python/pyarrow/tests/test_json.py 
b/python/pyarrow/tests/test_json.py
index b8c1e874fc..a0a6174266 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -226,7 +226,7 @@ class BaseTestJSONRead:
         assert table.num_columns == 0
         assert table.num_rows == 2
 
-    def test_reconcile_accross_blocks(self):
+    def test_reconcile_across_blocks(self):
         # ARROW-12065: reconciling inferred types across blocks
         first_row = b'{                               }\n'
         read_options = ReadOptions(block_size=len(first_row))
diff --git a/python/pyarrow/tests/test_misc.py 
b/python/pyarrow/tests/test_misc.py
index a48ac0c3cd..8b8c50882b 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -57,7 +57,7 @@ def test_io_thread_count():
 
 
 def test_env_var_io_thread_count():
-    # Test that the number of IO threads can be overriden with the
+    # Test that the number of IO threads can be overridden with the
     # ARROW_IO_THREADS environment variable.
     code = """if 1:
         import pyarrow as pa
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index 10eb931592..342beaaeb5 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1343,7 +1343,7 @@ class TestConvertDateTimeLikeTypes:
         ex_values[1] = pd.NaT.value
 
         # date32 and date64 convert to [ms] in pandas v2, but
-        # in pandas v1 they are siliently coerced to [ns]
+        # in pandas v1 they are silently coerced to [ns]
         ex_datetime64ms = ex_values.astype('datetime64[ms]')
         expected_pandas = pd.DataFrame({'date32': ex_datetime64ms,
                                         'date64': ex_datetime64ms},
diff --git a/python/pyarrow/tests/test_scalars.py 
b/python/pyarrow/tests/test_scalars.py
index d7585d1415..74dee59558 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -633,7 +633,7 @@ def test_struct():
     assert s['y'].as_py() == 3.5
 
     with pytest.raises(KeyError):
-        s['non-existent']
+        s['nonexistent']
 
     s = pa.scalar(None, type=ty)
     assert list(s) == list(s.keys()) == ['x', 'y']
diff --git a/python/pyarrow/tests/test_substrait.py 
b/python/pyarrow/tests/test_substrait.py
index 5dda2cfcf0..d4fbfb7406 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -182,7 +182,7 @@ def has_function(fns, ext_file, fn_name):
 
 def test_get_supported_functions():
     supported_functions = pa._substrait.get_supported_functions()
-    # It probably doesn't make sense to exhaustively verfiy this list but
+    # It probably doesn't make sense to exhaustively verify this list but
     # we can check a sample aggregate and a sample non-aggregate entry
     assert has_function(supported_functions,
                         'functions_arithmetic.yaml', 'add')
diff --git a/python/pyarrow/tests/test_types.py 
b/python/pyarrow/tests/test_types.py
index 16343eae61..7600f1dd33 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -1019,7 +1019,7 @@ def test_key_value_metadata():
     assert md['b'] == b'beta'
     assert md.get_all('a') == [b'alpha', b'Alpha', b'ALPHA']
     assert md.get_all('b') == [b'beta']
-    assert md.get_all('unkown') == []
+    assert md.get_all('unknown') == []
 
     with pytest.raises(KeyError):
         md = pa.KeyValueMetadata([
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index 62d1eb5baf..c8e376fefb 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -26,7 +26,7 @@ from pyarrow import compute as pc
 # UDFs are all tested with a dataset scan
 pytestmark = pytest.mark.dataset
 
-# For convience, most of the test here doesn't care about udf func docs
+# For convenience, most of the test here doesn't care about udf func docs
 empty_udf_doc = {"summary": "", "description": ""}
 
 try:
@@ -302,7 +302,7 @@ def raising_func_fixture():
 @pytest.fixture(scope="session")
 def unary_vector_func_fixture():
     """
-    Reigster a vector function
+    Register a vector function
     """
     def pct_rank(ctx, x):
         # copy here to get around pandas 1.0 issue
@@ -319,7 +319,7 @@ def unary_vector_func_fixture():
 @pytest.fixture(scope="session")
 def struct_vector_func_fixture():
     """
-    Reigster a vector function that returns a struct array
+    Register a vector function that returns a struct array
     """
     def pivot(ctx, k, v, c):
         df = pa.RecordBatch.from_arrays([k, v, c], names=['k', 'v', 
'c']).to_pandas()
@@ -486,7 +486,7 @@ def test_function_doc_validation():
                                     func_doc, in_types,
                                     out_type)
 
-    # doc with no decription
+    # doc with no description
     func_doc = {
         "summary": "test summary"
     }
diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py
index 4f178aefc5..bb693cd663 100644
--- a/python/pyarrow/util.py
+++ b/python/pyarrow/util.py
@@ -42,7 +42,7 @@ def doc(*docstrings, **params):
     If the docstring is a template, it will be saved as a string.
     Otherwise, it will be saved as a callable and the docstring will be 
obtained via
     the __doc__ attribute.
-    This decorator can not be used on Cython classes due to a CPython 
constraint,
+    This decorator cannot be used on Cython classes due to a CPython 
constraint,
     which enforces the __doc__ attribute to be read-only.
     See https://github.com/python/cpython/issues/91309

(arrow) branch main updated: GH-38944: [Python] Fix spelling (#38945)

Reply via email to