This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 6101d12676 GH-38944: [Python] Fix spelling (#38945)
6101d12676 is described below
commit 6101d12676f4cfac52822e3dc13034306a4bd83b
Author: Josh Soref <[email protected]>
AuthorDate: Fri Dec 1 03:30:13 2023 -0500
GH-38944: [Python] Fix spelling (#38945)
### Rationale for this change
### What changes are included in this PR?
Spelling fixes to python/
### Are these changes tested?
### Are there any user-facing changes?
* Closes: #38944
Authored-by: Josh Soref <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/CMakeLists.txt | 2 +-
python/examples/minimal_build/build_conda.sh | 2 +-
python/examples/minimal_build/build_venv.sh | 2 +-
python/pyarrow/_acero.pyx | 4 ++--
python/pyarrow/_compute.pyx | 8 +++----
python/pyarrow/_cuda.pyx | 2 +-
python/pyarrow/_dataset.pyx | 4 ++--
python/pyarrow/_dataset_parquet.pyx | 6 ++---
python/pyarrow/_parquet.pyx | 6 ++---
python/pyarrow/acero.py | 2 +-
python/pyarrow/array.pxi | 4 ++--
python/pyarrow/dataset.py | 8 +++----
python/pyarrow/includes/libarrow_python.pxd | 2 +-
python/pyarrow/interchange/column.py | 2 +-
python/pyarrow/interchange/from_dataframe.py | 14 +++++------
python/pyarrow/io.pxi | 8 +++----
python/pyarrow/parquet/core.py | 10 ++++----
python/pyarrow/src/arrow/python/datetime.h | 2 +-
python/pyarrow/src/arrow/python/inference.cc | 2 +-
python/pyarrow/src/arrow/python/python_to_arrow.cc | 6 ++---
python/pyarrow/src/arrow/python/udf.cc | 4 ++--
python/pyarrow/table.pxi | 28 +++++++++++-----------
python/pyarrow/tests/parquet/test_basic.py | 2 +-
python/pyarrow/tests/parquet/test_dataset.py | 4 ++--
python/pyarrow/tests/test_acero.py | 2 +-
python/pyarrow/tests/test_array.py | 2 +-
python/pyarrow/tests/test_compute.py | 4 ++--
python/pyarrow/tests/test_dataset.py | 4 ++--
python/pyarrow/tests/test_dataset_encryption.py | 2 +-
python/pyarrow/tests/test_fs.py | 14 +++++------
python/pyarrow/tests/test_io.py | 2 +-
python/pyarrow/tests/test_json.py | 2 +-
python/pyarrow/tests/test_misc.py | 2 +-
python/pyarrow/tests/test_pandas.py | 2 +-
python/pyarrow/tests/test_scalars.py | 2 +-
python/pyarrow/tests/test_substrait.py | 2 +-
python/pyarrow/tests/test_types.py | 2 +-
python/pyarrow/tests/test_udf.py | 8 +++----
python/pyarrow/util.py | 2 +-
39 files changed, 93 insertions(+), 93 deletions(-)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 529265235c..3f810d2727 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -1,5 +1,5 @@
# Licensed to the Apache Software Foundation (ASF) under one
-# or more cod ntributor license agreements. See the NOTICE file
+# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
diff --git a/python/examples/minimal_build/build_conda.sh
b/python/examples/minimal_build/build_conda.sh
index cd0030ac5f..72c3a5f9ea 100755
--- a/python/examples/minimal_build/build_conda.sh
+++ b/python/examples/minimal_build/build_conda.sh
@@ -91,7 +91,7 @@ popd
# Build and test Python library
pushd $ARROW_ROOT/python
-rm -rf build/ # remove any pesky pre-existing build directory
+rm -rf build/ # remove any pesky preexisting build directory
export
CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}}
export PYARROW_BUILD_TYPE=Debug
diff --git a/python/examples/minimal_build/build_venv.sh
b/python/examples/minimal_build/build_venv.sh
index d0432049f7..3bd641d0e7 100755
--- a/python/examples/minimal_build/build_venv.sh
+++ b/python/examples/minimal_build/build_venv.sh
@@ -62,7 +62,7 @@ popd
# Build and test Python library
pushd $ARROW_ROOT/python
-rm -rf build/ # remove any pesky pre-existing build directory
+rm -rf build/ # remove any pesky preexisting build directory
export
CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH}}
export PYARROW_BUILD_TYPE=Debug
diff --git a/python/pyarrow/_acero.pyx b/python/pyarrow/_acero.pyx
index bb3196c86e..1c9b2f75c3 100644
--- a/python/pyarrow/_acero.pyx
+++ b/python/pyarrow/_acero.pyx
@@ -155,7 +155,7 @@ class ProjectNodeOptions(_ProjectNodeOptions):
List of expressions to evaluate against the source batch. This must
be scalar expressions.
names : list of str, optional
- List of names for each of the ouptut columns (same length as
+ List of names for each of the output columns (same length as
`expressions`). If `names` is not provided, the string
representations of exprs will be used.
"""
@@ -213,7 +213,7 @@ class AggregateNodeOptions(_AggregateNodeOptions):
Parameters
----------
aggregates : list of tuples
- Aggregations which will be applied to the targetted fields.
+ Aggregations which will be applied to the targeted fields.
Specified as a list of tuples, where each tuple is one aggregation
specification and consists of: aggregation target column(s) followed
by function name, aggregation function options object and the
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 51dfdbf8eb..a267d53599 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -1390,7 +1390,7 @@ class TakeOptions(_TakeOptions):
----------
boundscheck : boolean, default True
Whether to check indices are within bounds. If False and an
- index is out of boundes, behavior is undefined (the process
+ index is out of bounds, behavior is undefined (the process
may crash).
"""
@@ -1468,7 +1468,7 @@ cdef class _StructFieldOptions(FunctionOptions):
def _set_options(self, indices):
if isinstance(indices, (list, tuple)) and not len(indices):
- # Allow empty indices; effecitively return same array
+ # Allow empty indices; effectively return same array
self.wrapped.reset(
new CStructFieldOptions(<vector[int]>indices))
return
@@ -2991,7 +2991,7 @@ def register_aggregate_function(func, function_name,
function_doc, in_types, out
This is often used with ordered or segmented aggregation where groups
can be emit before accumulating all of the input data.
- Note that currently the size of any input column can not exceed 2 GB
+ Note that currently the size of any input column cannot exceed 2 GB
for a single segment (all groups combined).
Parameters
@@ -3076,7 +3076,7 @@ def register_tabular_function(func, function_name,
function_doc, in_types, out_t
UdfContext and returning a generator of struct arrays.
The in_types argument must be empty and the out_type argument
specifies a schema. Each struct array must have field types
- correspoding to the schema.
+ corresponding to the schema.
Parameters
----------
diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx
index dc7f42c10b..ba799a105e 100644
--- a/python/pyarrow/_cuda.pyx
+++ b/python/pyarrow/_cuda.pyx
@@ -493,7 +493,7 @@ cdef class CudaBuffer(Buffer):
raise ValueError(
'requested more to copy than available from '
'device buffer')
- # copy nbytes starting from position to new host buffeer
+ # copy nbytes starting from position to new host buffer
c_nbytes = nbytes
buf = allocate_buffer(c_nbytes, memory_pool=memory_pool,
resizable=resizable)
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index d7d69965d0..029948a609 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -853,7 +853,7 @@ cdef class Dataset(_Weakrefable):
Which suffix to add to right column names. This prevents confusion
when the columns in left and right datasets have colliding names.
right_suffix : str, default None
- Which suffic to add to the left column names. This prevents
confusion
+ Which suffix to add to the left column names. This prevents
confusion
when the columns in left and right datasets have colliding names.
coalesce_keys : bool, default True
If the duplicated keys should be omitted from one of the sides
@@ -1016,7 +1016,7 @@ cdef class FileSystemDataset(Dataset):
elif not isinstance(root_partition, Expression):
raise TypeError(
"Argument 'root_partition' has incorrect type (expected "
- "Epression, got {0})".format(type(root_partition))
+ "Expression, got {0})".format(type(root_partition))
)
for fragment in fragments:
diff --git a/python/pyarrow/_dataset_parquet.pyx
b/python/pyarrow/_dataset_parquet.pyx
index bd4151624d..f83b78d933 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -415,7 +415,7 @@ cdef class ParquetFileFragment(FileFragment):
the Parquet RowGroup statistics).
schema : Schema, default None
Schema to use when filtering row groups. Defaults to the
- Fragment's phsyical schema
+ Fragment's physical schema
Returns
-------
@@ -450,7 +450,7 @@ cdef class ParquetFileFragment(FileFragment):
the Parquet RowGroup statistics).
schema : Schema, default None
Schema to use when filtering row groups. Defaults to the
- Fragment's phsyical schema
+ Fragment's physical schema
row_group_ids : list of ints
The row group IDs to include in the subset. Can only be specified
if `filter` is None.
@@ -688,7 +688,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
pre_buffer : bool, default True
If enabled, pre-buffer the raw Parquet data instead of issuing one
read per column chunk. This can improve performance on high-latency
- filesystems (e.g. S3, GCS) by coalesing and issuing file reads in
+ filesystems (e.g. S3, GCS) by coalescing and issuing file reads in
parallel using a background I/O thread pool.
Set to False if you want to prioritize minimal memory usage
over maximum speed.
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 737ba9d0a8..35344eb735 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -487,7 +487,7 @@ cdef class ColumnChunkMetaData(_Weakrefable):
@property
def total_compressed_size(self):
- """Compresssed size in bytes (int)."""
+ """Compressed size in bytes (int)."""
return self.metadata.total_compressed_size()
@property
@@ -1655,7 +1655,7 @@ cdef shared_ptr[WriterProperties]
_create_writer_properties(
if use_byte_stream_split:
if column_encoding is not None:
raise ValueError(
- "'use_byte_stream_split' can not be passed"
+ "'use_byte_stream_split' cannot be passed"
"together with 'column_encoding'")
else:
props.encoding(ParquetEncoding_BYTE_STREAM_SPLIT)
@@ -1667,7 +1667,7 @@ cdef shared_ptr[WriterProperties]
_create_writer_properties(
column_encoding[column] = 'BYTE_STREAM_SPLIT'
else:
raise ValueError(
- "'use_byte_stream_split' can not be passed"
+ "'use_byte_stream_split' cannot be passed"
"together with 'column_encoding'")
# column_encoding
diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py
index 0609e45753..a5583c9e65 100644
--- a/python/pyarrow/acero.py
+++ b/python/pyarrow/acero.py
@@ -221,7 +221,7 @@ def _perform_join(join_type, left_operand, left_keys,
# Do not include right table keys. As they would lead to
duplicated keys
continue
else:
- # For all the other columns incude them as they are.
+ # For all the other columns include them as they are.
# Just recompute the suffixes that the join produced as the
projection
# would lose them otherwise.
if (
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 2e97503822..9d62bed51f 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -254,7 +254,7 @@ def array(object obj, type=None, mask=None, size=None,
from_pandas=None,
schema_capsule, array_capsule = obj.__arrow_c_array__(requested_type)
out_array = Array._import_from_c_capsule(schema_capsule, array_capsule)
if type is not None and out_array.type != type:
- # PyCapsule interface type coersion is best effort, so we need to
+ # PyCapsule interface type coercion is best effort, so we need to
# check the type of the returned array and cast if necessary
out_array = array.cast(type, safe=safe, memory_pool=memory_pool)
return out_array
@@ -3415,7 +3415,7 @@ cdef class RunEndEncodedArray(Array):
Find the physical offset of this REE array.
This is the offset of the run that contains the value of the first
- logical element of this array considering its offet.
+ logical element of this array considering its offset.
This function uses binary-search, so it has a O(log N) cost.
"""
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index adf21814a2..9301a5fee5 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -169,7 +169,7 @@ def partitioning(schema=None, field_names=None, flavor=None,
Returns
-------
Partitioning or PartitioningFactory
- The partioning scheme
+ The partitioning scheme
Examples
--------
@@ -511,7 +511,7 @@ def parquet_dataset(metadata_path, schema=None,
filesystem=None, format=None,
partitioning=None, partition_base_dir=None):
"""
Create a FileSystemDataset from a `_metadata` file created via
- `pyarrrow.parquet.write_metadata`.
+ `pyarrow.parquet.write_metadata`.
Parameters
----------
@@ -534,7 +534,7 @@ def parquet_dataset(metadata_path, schema=None,
filesystem=None, format=None,
partitioning : Partitioning, PartitioningFactory, str, list of str
The partitioning scheme specified with the ``partitioning()``
function. A flavor string can be used as shortcut, and with a list of
- field names a DirectionaryPartitioning will be inferred.
+ field names a DirectoryPartitioning will be inferred.
partition_base_dir : str, optional
For the purposes of applying the partitioning, paths will be
stripped of the partition_base_dir. Files not matching the
@@ -630,7 +630,7 @@ RecordBatch or Table, iterable of RecordBatch,
RecordBatchReader, or URI
partitioning : Partitioning, PartitioningFactory, str, list of str
The partitioning scheme specified with the ``partitioning()``
function. A flavor string can be used as shortcut, and with a list of
- field names a DirectionaryPartitioning will be inferred.
+ field names a DirectoryPartitioning will be inferred.
partition_base_dir : str, optional
For the purposes of applying the partitioning, paths will be
stripped of the partition_base_dir. Files not matching the
diff --git a/python/pyarrow/includes/libarrow_python.pxd
b/python/pyarrow/includes/libarrow_python.pxd
index b8a3041796..e3179062a1 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -263,7 +263,7 @@ cdef extern from "arrow/python/common.h" namespace
"arrow::py":
cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
cdef cppclass SharedPtrNoGIL[T](shared_ptr[T]):
- # This looks like the only way to satsify both Cython 2 and Cython 3
+ # This looks like the only way to satisfy both Cython 2 and Cython 3
SharedPtrNoGIL& operator=(...)
cdef cppclass UniquePtrNoGIL[T, DELETER=*](unique_ptr[T, DELETER]):
UniquePtrNoGIL& operator=(...)
diff --git a/python/pyarrow/interchange/column.py
b/python/pyarrow/interchange/column.py
index eaf7834d5b..e609e469b0 100644
--- a/python/pyarrow/interchange/column.py
+++ b/python/pyarrow/interchange/column.py
@@ -372,7 +372,7 @@ class _PyArrowColumn:
"""
# In case of no missing values, we need to set ColumnNullType to
# non nullable as in the current __dataframe__ protocol bit/byte masks
- # can not be None
+ # cannot be None
if self.null_count == 0:
return ColumnNullType.NON_NULLABLE, None
else:
diff --git a/python/pyarrow/interchange/from_dataframe.py
b/python/pyarrow/interchange/from_dataframe.py
index 3767b18f2a..fcaec41e3d 100644
--- a/python/pyarrow/interchange/from_dataframe.py
+++ b/python/pyarrow/interchange/from_dataframe.py
@@ -86,20 +86,20 @@ def from_dataframe(df: DataFrameObject, allow_copy=True) ->
pa.Table:
>>> import pandas as pd
>>> df = pd.DataFrame({
- ... "n_atendees": [100, 10, 1],
+ ... "n_attendees": [100, 10, 1],
... "country": ["Italy", "Spain", "Slovenia"],
... })
>>> df
- n_atendees country
- 0 100 Italy
- 1 10 Spain
- 2 1 Slovenia
+ n_attendees country
+ 0 100 Italy
+ 1 10 Spain
+ 2 1 Slovenia
>>> from_dataframe(df)
pyarrow.Table
- n_atendees: int64
+ n_attendees: int64
country: large_string
----
- n_atendees: [[100,10,1]]
+ n_attendees: [[100,10,1]]
country: [["Italy","Spain","Slovenia"]]
"""
if isinstance(df, pa.Table):
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 24b4e003a2..3086845efa 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -1438,7 +1438,7 @@ cdef class Buffer(_Weakrefable):
def __getreadbuffer__(self, Py_ssize_t idx, void **p):
if idx != 0:
- raise SystemError("accessing non-existent buffer segment")
+ raise SystemError("accessing nonexistent buffer segment")
if p != NULL:
p[0] = <void*> self.buffer.get().data()
return self.size
@@ -1447,7 +1447,7 @@ cdef class Buffer(_Weakrefable):
if not self.buffer.get().is_mutable():
raise SystemError("trying to write an immutable buffer")
if idx != 0:
- raise SystemError("accessing non-existent buffer segment")
+ raise SystemError("accessing nonexistent buffer segment")
if p != NULL:
p[0] = <void*> self.buffer.get().data()
return self.size
@@ -1629,7 +1629,7 @@ cdef class CompressedInputStream(NativeFile):
Examples
--------
- Create an ouput stream wich compresses the data:
+ Create an output stream wich compresses the data:
>>> import pyarrow as pa
>>> data = b"Compressed stream"
@@ -1686,7 +1686,7 @@ cdef class CompressedOutputStream(NativeFile):
Examples
--------
- Create an ouput stream wich compresses the data:
+ Create an output stream wich compresses the data:
>>> import pyarrow as pa
>>> data = b"Compressed stream"
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 096e960384..db22eb3293 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -828,7 +828,7 @@ use_byte_stream_split : bool or list, default False
and should be combined with a compression codec.
column_encoding : string or dict, default None
Specify the encoding scheme on a per column basis.
- Can only be used when when ``use_dictionary`` is set to False, and
+ Can only be used when ``use_dictionary`` is set to False, and
cannot be used in combination with ``use_byte_stream_split``.
Currently supported values: {'PLAIN', 'BYTE_STREAM_SPLIT',
'DELTA_BINARY_PACKED', 'DELTA_LENGTH_BYTE_ARRAY', 'DELTA_BYTE_ARRAY'}.
@@ -1906,7 +1906,7 @@ Examples
warnings.warn(
"Specifying the 'schema' argument with 'use_legacy_dataset="
"True' is deprecated as of pyarrow 8.0.0. You can still "
- "specify it in combination with 'use_legacy_dataet=False', "
+ "specify it in combination with 'use_legacy_dataset=False', "
"but in that case you need to specify a pyarrow.Schema "
"instead of a ParquetSchema.",
FutureWarning, stacklevel=2)
@@ -3272,13 +3272,13 @@ def write_to_dataset(table, root_path,
partition_cols=None,
passed, the filename will consist of a uuid.
This option is only supported for use_legacy_dataset=True.
When use_legacy_dataset=None and this option is specified,
- use_legacy_datase will be set to True.
+ use_legacy_dataset will be set to True.
filesystem : FileSystem, default None
If nothing passed, will be inferred based on path.
Path will try to be found in the local on-disk filesystem otherwise
it will be parsed as an URI to determine the filesystem.
use_legacy_dataset : bool
- Default is False. Set to True to use the the legacy behaviour
+ Default is False. Set to True to use the legacy behaviour
(this option is deprecated, and the legacy implementation will be
removed in a future version). The legacy implementation still
supports the `partition_filename_cb` keyword but is less efficient
@@ -3386,7 +3386,7 @@ def write_to_dataset(table, root_path,
partition_cols=None,
else:
use_legacy_dataset = False
- # Check for conflicting kewords
+ # Check for conflicting keywords
msg_confl_0 = (
"The '{0}' argument is not supported by use_legacy_dataset={2}. "
"Use only '{1}' instead."
diff --git a/python/pyarrow/src/arrow/python/datetime.h
b/python/pyarrow/src/arrow/python/datetime.h
index 327a61f3de..7346d6bc67 100644
--- a/python/pyarrow/src/arrow/python/datetime.h
+++ b/python/pyarrow/src/arrow/python/datetime.h
@@ -220,7 +220,7 @@ ARROW_PYTHON_EXPORT
Result<PyObject*> MonthDayNanoIntervalArrayToPyList(
const MonthDayNanoIntervalArray& array);
-/// \brief Convert the Scalar obect to a pyarrow.MonthDayNano (or None if
+/// \brief Convert the Scalar object to a pyarrow.MonthDayNano (or None if
/// is isn't valid).
ARROW_PYTHON_EXPORT
Result<PyObject*> MonthDayNanoIntervalScalarToPyObject(
diff --git a/python/pyarrow/src/arrow/python/inference.cc
b/python/pyarrow/src/arrow/python/inference.cc
index 3407b32720..9537aec574 100644
--- a/python/pyarrow/src/arrow/python/inference.cc
+++ b/python/pyarrow/src/arrow/python/inference.cc
@@ -623,7 +623,7 @@ class TypeInferrer {
// XXX(wesm): In ARROW-4324 I added accounting to check whether
// all of the non-null values have NumPy dtypes, but the
- // total_count not not being properly incremented here
+ // total_count not being properly incremented here
++(*list_inferrer_).total_count_;
return list_inferrer_->VisitDType(dtype, keep_going);
}
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc
b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index e474924998..23b92598e3 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -121,7 +121,7 @@ const MonthDayNanoAttrData
MonthDayNanoTraits<MonthDayNanoField::kNanoseconds>::
{"minutes", /*minutes_in_hours=*/60},
{"seconds", /*seconds_in_minute=*/60},
{"milliseconds", /*milliseconds_in_seconds*/ 1000},
- {"microseconds", /*microseconds_in_millseconds=*/1000},
+ {"microseconds", /*microseconds_in_milliseconds=*/1000},
{"nanoseconds", /*nanoseconds_in_microseconds=*/1000},
{nullptr, 0}};
@@ -481,7 +481,7 @@ class PyValue {
// The binary-like intermediate representation is PyBytesView because it
keeps temporary
// python objects alive (non-contiguous memoryview) and stores whether the
original
- // object was unicode encoded or not, which is used for unicode -> bytes
coersion if
+ // object was unicode encoded or not, which is used for unicode -> bytes
coercion if
// there is a non-unicode object observed.
static Status Convert(const BaseBinaryType*, const O&, I obj, PyBytesView&
view) {
@@ -819,7 +819,7 @@ class PyListConverter : public ListConverter<T,
PyConverter, PyConverterTrait> {
protected:
Status ValidateBuilder(const MapType*) {
if (this->list_builder_->key_builder()->null_count() > 0) {
- return Status::Invalid("Invalid Map: key field can not contain null
values");
+ return Status::Invalid("Invalid Map: key field cannot contain null
values");
} else {
return Status::OK();
}
diff --git a/python/pyarrow/src/arrow/python/udf.cc
b/python/pyarrow/src/arrow/python/udf.cc
index f7761a9277..e9b72a2592 100644
--- a/python/pyarrow/src/arrow/python/udf.cc
+++ b/python/pyarrow/src/arrow/python/udf.cc
@@ -275,7 +275,7 @@ struct PythonUdfHashAggregatorImpl : public
HashUdfAggregator {
}
}
- // same as ApplyGrouping in parition.cc
+ // same as ApplyGrouping in partition.cc
// replicated the code here to avoid complicating the dependencies
static Result<RecordBatchVector> ApplyGroupings(
const ListArray& groupings, const std::shared_ptr<RecordBatch>& batch) {
@@ -600,7 +600,7 @@ Status RegisterScalarAggregateFunction(PyObject* function,
UdfWrapperCallback cb
/// \param options User provided udf options
UdfOptions AdjustForHashAggregate(const UdfOptions& options) {
UdfOptions hash_options;
- // Append hash_ before the function name to seperate from the scalar
+ // Append hash_ before the function name to separate from the scalar
// version
hash_options.func_name = "hash_" + options.func_name;
// Extend input types with group id. Group id is appended by the group
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 0fa913a219..bbed789553 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -449,7 +449,7 @@ cdef class ChunkedArray(_PandasConvertible):
>>> import pyarrow as pa
>>> n_legs = pa.chunked_array([[2, 2, 4], [4, 5, 100]])
>>> animals = pa.chunked_array((
- ... ["Flamingo", "Parot", "Dog"],
+ ... ["Flamingo", "Parrot", "Dog"],
... ["Horse", "Brittle stars", "Centipede"]
... ))
>>> n_legs.equals(n_legs)
@@ -584,7 +584,7 @@ cdef class ChunkedArray(_PandasConvertible):
--------
>>> import pyarrow as pa
>>> animals = pa.chunked_array((
- ... ["Flamingo", "Parot", "Dog"],
+ ... ["Flamingo", "Parrot", "Dog"],
... ["Horse", "Brittle stars", "Centipede"]
... ))
>>> animals.dictionary_encode()
@@ -594,7 +594,7 @@ cdef class ChunkedArray(_PandasConvertible):
-- dictionary:
[
"Flamingo",
- "Parot",
+ "Parrot",
"Dog",
"Horse",
"Brittle stars",
@@ -610,7 +610,7 @@ cdef class ChunkedArray(_PandasConvertible):
-- dictionary:
[
"Flamingo",
- "Parot",
+ "Parrot",
"Dog",
"Horse",
"Brittle stars",
@@ -1127,7 +1127,7 @@ cdef class ChunkedArray(_PandasConvertible):
Examples
--------
>>> import pyarrow as pa
- >>> arr_1 = pa.array(["Flamingo", "Parot", "Dog"]).dictionary_encode()
+ >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
>>> arr_2 = pa.array(["Horse", "Brittle stars",
"Centipede"]).dictionary_encode()
>>> c_arr = pa.chunked_array([arr_1, arr_2])
>>> c_arr
@@ -1137,7 +1137,7 @@ cdef class ChunkedArray(_PandasConvertible):
-- dictionary:
[
"Flamingo",
- "Parot",
+ "Parrot",
"Dog"
]
-- indices:
@@ -1167,7 +1167,7 @@ cdef class ChunkedArray(_PandasConvertible):
-- dictionary:
[
"Flamingo",
- "Parot",
+ "Parrot",
"Dog",
"Horse",
"Brittle stars",
@@ -1183,7 +1183,7 @@ cdef class ChunkedArray(_PandasConvertible):
-- dictionary:
[
"Flamingo",
- "Parot",
+ "Parrot",
"Dog",
"Horse",
"Brittle stars",
@@ -2804,7 +2804,7 @@ cdef class RecordBatch(_Tabular):
>>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle
stars", "Centipede"])
>>> names = ["n_legs", "animals"]
- Construct a RecordBartch from pyarrow Arrays using names:
+ Construct a RecordBatch from pyarrow Arrays using names:
>>> pa.RecordBatch.from_arrays([n_legs, animals], names=names)
pyarrow.RecordBatch
@@ -2822,7 +2822,7 @@ cdef class RecordBatch(_Tabular):
4 5 Brittle stars
5 100 Centipede
- Construct a RecordBartch from pyarrow Arrays using schema:
+ Construct a RecordBatch from pyarrow Arrays using schema:
>>> my_schema = pa.schema([
... pa.field('n_legs', pa.int64()),
@@ -3659,7 +3659,7 @@ cdef class Table(_Tabular):
Examples
--------
>>> import pyarrow as pa
- >>> arr_1 = pa.array(["Flamingo", "Parot", "Dog"]).dictionary_encode()
+ >>> arr_1 = pa.array(["Flamingo", "Parrot", "Dog"]).dictionary_encode()
>>> arr_2 = pa.array(["Horse", "Brittle stars",
"Centipede"]).dictionary_encode()
>>> c_arr = pa.chunked_array([arr_1, arr_2])
>>> table = pa.table([c_arr], names=["animals"])
@@ -3668,7 +3668,7 @@ cdef class Table(_Tabular):
animals: dictionary<values=string, indices=int32, ordered=0>
----
animals: [ -- dictionary:
- ["Flamingo","Parot","Dog"] -- indices:
+ ["Flamingo","Parrot","Dog"] -- indices:
[0,1,2], -- dictionary:
["Horse","Brittle stars","Centipede"] -- indices:
[0,1,2]]
@@ -3680,9 +3680,9 @@ cdef class Table(_Tabular):
animals: dictionary<values=string, indices=int32, ordered=0>
----
animals: [ -- dictionary:
- ["Flamingo","Parot","Dog","Horse","Brittle stars","Centipede"] --
indices:
+ ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"] --
indices:
[0,1,2], -- dictionary:
- ["Flamingo","Parot","Dog","Horse","Brittle stars","Centipede"] --
indices:
+ ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"] --
indices:
[3,4,5]]
"""
cdef:
diff --git a/python/pyarrow/tests/parquet/test_basic.py
b/python/pyarrow/tests/parquet/test_basic.py
index 26c52b1cc5..83e6ebeb7a 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -661,7 +661,7 @@ def test_write_error_deletes_incomplete_file(tempdir):
@parametrize_legacy_dataset
def test_read_non_existent_file(tempdir, use_legacy_dataset):
- path = 'non-existent-file.parquet'
+ path = 'nonexistent-file.parquet'
try:
pq.read_table(path, use_legacy_dataset=use_legacy_dataset)
except Exception as e:
diff --git a/python/pyarrow/tests/parquet/test_dataset.py
b/python/pyarrow/tests/parquet/test_dataset.py
index be27c71b81..a9e99d5d65 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -1622,7 +1622,7 @@ def test_read_table_schema(tempdir):
expected = pa.table({'a': [1, 2, 3]}, schema=schema)
assert result.equals(expected)
- # reading multiple fiels
+ # reading multiple fields
result = pq.read_table(tempdir, schema=schema)
expected = pa.table({'a': [1, 2, 3, 1, 2, 3]}, schema=schema)
assert result.equals(expected)
@@ -1796,7 +1796,7 @@ def
test_parquet_write_to_dataset_deprecated_properties(tempdir):
@pytest.mark.dataset
-def test_parquet_write_to_dataset_unsupported_keywards_in_legacy(tempdir):
+def test_parquet_write_to_dataset_unsupported_keywords_in_legacy(tempdir):
table = pa.table({'a': [1, 2, 3]})
path = tempdir / 'data.parquet'
diff --git a/python/pyarrow/tests/test_acero.py
b/python/pyarrow/tests/test_acero.py
index 988e9b6e31..a436060130 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -265,7 +265,7 @@ def test_order_by():
expected = pa.table({"a": [3, 2, 4, 1], "b": [None, 3, 2, 1]})
assert result.equals(expected)
- # emtpy ordering
+ # empty ordering
ord_opts = OrderByNodeOptions([])
decl = Declaration.from_sequence([table_source, Declaration("order_by",
ord_opts)])
with pytest.raises(
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index 2f9727922b..599d15d023 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -989,7 +989,7 @@ def test_list_array_types_from_arrays_fail(list_array_type,
list_type_factory):
reconstructed_arr = list_array_type.from_arrays(arr.offsets, arr.values)
assert reconstructed_arr.to_pylist() == [[0], [], [0, None], [0]]
- # Manually specifiying offsets (with nulls) is same as mask at top level
+ # Manually specifying offsets (with nulls) is same as mask at top level
reconstructed_arr = list_array_type.from_arrays(offsets, arr.values)
assert arr == reconstructed_arr
reconstructed_arr = list_array_type.from_arrays(arr.offsets,
diff --git a/python/pyarrow/tests/test_compute.py
b/python/pyarrow/tests/test_compute.py
index 4b2144d702..067d96a821 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -2385,7 +2385,7 @@ def _check_temporal_rounding(ts, values, unit):
# Check rounding with calendar_based_origin=True.
# Note: rounding to month is not supported in Pandas so we can't
- # approximate this functionallity and exclude unit == "day".
+ # approximate this functionality and exclude unit == "day".
if unit != "day":
options = pc.RoundTemporalOptions(
value, unit, calendar_based_origin=True)
@@ -3501,7 +3501,7 @@ def test_expression_call_function():
assert str(pc.add(field, 1)) == "add(field, 1)"
assert str(pc.add(field, pa.scalar(1))) == "add(field, 1)"
- # Invalid pc.scalar input gives original erorr message
+ # Invalid pc.scalar input gives original error message
msg = "only other expressions allowed as arguments"
with pytest.raises(TypeError, match=msg):
pc.add(field, object)
diff --git a/python/pyarrow/tests/test_dataset.py
b/python/pyarrow/tests/test_dataset.py
index c6967326b3..d5e7015a5d 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2249,7 +2249,7 @@ def test_construct_from_list_of_files(tempdir,
dataset_reader):
@pytest.mark.parquet
def test_construct_from_list_of_mixed_paths_fails(mockfs):
- # isntantiate from a list of mixed paths
+ # instantiate from a list of mixed paths
files = [
'subdir/1/xxx/file0.parquet',
'subdir/1/xxx/doesnt-exist.parquet',
@@ -2260,7 +2260,7 @@ def test_construct_from_list_of_mixed_paths_fails(mockfs):
@pytest.mark.parquet
def test_construct_from_mixed_child_datasets(mockfs):
- # isntantiate from a list of mixed paths
+ # instantiate from a list of mixed paths
a = ds.dataset(['subdir/1/xxx/file0.parquet',
'subdir/2/yyy/file1.parquet'], filesystem=mockfs)
b = ds.dataset('subdir', filesystem=mockfs)
diff --git a/python/pyarrow/tests/test_dataset_encryption.py
b/python/pyarrow/tests/test_dataset_encryption.py
index b5d6f510db..d25b22990a 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -123,7 +123,7 @@ def test_dataset_encryption_decryption():
filesystem=mockfs,
)
- # read without descryption config -> should error is dataset was properly
encrypted
+ # read without decryption config -> should error is dataset was properly
encrypted
pformat = pa.dataset.ParquetFileFormat()
with pytest.raises(IOError, match=r"no decryption"):
ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index c540bf9681..1002e13471 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1303,12 +1303,12 @@ def test_s3_proxy_options(monkeypatch, pickle_module):
# Missing port
with pytest.raises(KeyError):
S3FileSystem(proxy_options={'scheme': 'http', 'host': 'localhost'})
- # Invalid proxy URI (invalid scheme htttps)
+ # Invalid proxy URI (invalid scheme httpsB)
with pytest.raises(pa.ArrowInvalid):
- S3FileSystem(proxy_options='htttps://localhost:9000')
- # Invalid proxy_options dict (invalid scheme htttps)
+ S3FileSystem(proxy_options='httpsB://localhost:9000')
+ # Invalid proxy_options dict (invalid scheme httpA)
with pytest.raises(pa.ArrowInvalid):
- S3FileSystem(proxy_options={'scheme': 'htttp', 'host': 'localhost',
+ S3FileSystem(proxy_options={'scheme': 'httpA', 'host': 'localhost',
'port': 8999})
@@ -1690,11 +1690,11 @@ def test_s3_real_aws_region_selection():
assert fs.region == 'us-east-2'
# Reading from the wrong region may still work for public buckets...
- # Non-existent bucket (hopefully, otherwise need to fix this test)
+ # Nonexistent bucket (hopefully, otherwise need to fix this test)
with pytest.raises(IOError, match="Bucket '.*' not found"):
- FileSystem.from_uri('s3://x-arrow-non-existent-bucket')
+ FileSystem.from_uri('s3://x-arrow-nonexistent-bucket')
fs, path = FileSystem.from_uri(
- 's3://x-arrow-non-existent-bucket?region=us-east-3')
+ 's3://x-arrow-nonexistent-bucket?region=us-east-3')
assert fs.region == 'us-east-3'
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 9609e4066a..071962af29 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -229,7 +229,7 @@ def test_python_file_read_buffer():
buf = f.read_buffer(length)
assert len(buf) == length
assert memoryview(buf).tobytes() == dst_buf[:length]
- # buf should point to the same memory, so modyfing it
+ # buf should point to the same memory, so modifying it
memoryview(buf)[0] = ord(b'x')
# should modify the original
assert dst_buf[0] == ord(b'x')
diff --git a/python/pyarrow/tests/test_json.py
b/python/pyarrow/tests/test_json.py
index b8c1e874fc..a0a6174266 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -226,7 +226,7 @@ class BaseTestJSONRead:
assert table.num_columns == 0
assert table.num_rows == 2
- def test_reconcile_accross_blocks(self):
+ def test_reconcile_across_blocks(self):
# ARROW-12065: reconciling inferred types across blocks
first_row = b'{ }\n'
read_options = ReadOptions(block_size=len(first_row))
diff --git a/python/pyarrow/tests/test_misc.py
b/python/pyarrow/tests/test_misc.py
index a48ac0c3cd..8b8c50882b 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -57,7 +57,7 @@ def test_io_thread_count():
def test_env_var_io_thread_count():
- # Test that the number of IO threads can be overriden with the
+ # Test that the number of IO threads can be overridden with the
# ARROW_IO_THREADS environment variable.
code = """if 1:
import pyarrow as pa
diff --git a/python/pyarrow/tests/test_pandas.py
b/python/pyarrow/tests/test_pandas.py
index 10eb931592..342beaaeb5 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1343,7 +1343,7 @@ class TestConvertDateTimeLikeTypes:
ex_values[1] = pd.NaT.value
# date32 and date64 convert to [ms] in pandas v2, but
- # in pandas v1 they are siliently coerced to [ns]
+ # in pandas v1 they are silently coerced to [ns]
ex_datetime64ms = ex_values.astype('datetime64[ms]')
expected_pandas = pd.DataFrame({'date32': ex_datetime64ms,
'date64': ex_datetime64ms},
diff --git a/python/pyarrow/tests/test_scalars.py
b/python/pyarrow/tests/test_scalars.py
index d7585d1415..74dee59558 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -633,7 +633,7 @@ def test_struct():
assert s['y'].as_py() == 3.5
with pytest.raises(KeyError):
- s['non-existent']
+ s['nonexistent']
s = pa.scalar(None, type=ty)
assert list(s) == list(s.keys()) == ['x', 'y']
diff --git a/python/pyarrow/tests/test_substrait.py
b/python/pyarrow/tests/test_substrait.py
index 5dda2cfcf0..d4fbfb7406 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -182,7 +182,7 @@ def has_function(fns, ext_file, fn_name):
def test_get_supported_functions():
supported_functions = pa._substrait.get_supported_functions()
- # It probably doesn't make sense to exhaustively verfiy this list but
+ # It probably doesn't make sense to exhaustively verify this list but
# we can check a sample aggregate and a sample non-aggregate entry
assert has_function(supported_functions,
'functions_arithmetic.yaml', 'add')
diff --git a/python/pyarrow/tests/test_types.py
b/python/pyarrow/tests/test_types.py
index 16343eae61..7600f1dd33 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -1019,7 +1019,7 @@ def test_key_value_metadata():
assert md['b'] == b'beta'
assert md.get_all('a') == [b'alpha', b'Alpha', b'ALPHA']
assert md.get_all('b') == [b'beta']
- assert md.get_all('unkown') == []
+ assert md.get_all('unknown') == []
with pytest.raises(KeyError):
md = pa.KeyValueMetadata([
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index 62d1eb5baf..c8e376fefb 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -26,7 +26,7 @@ from pyarrow import compute as pc
# UDFs are all tested with a dataset scan
pytestmark = pytest.mark.dataset
-# For convience, most of the test here doesn't care about udf func docs
+# For convenience, most of the test here doesn't care about udf func docs
empty_udf_doc = {"summary": "", "description": ""}
try:
@@ -302,7 +302,7 @@ def raising_func_fixture():
@pytest.fixture(scope="session")
def unary_vector_func_fixture():
"""
- Reigster a vector function
+ Register a vector function
"""
def pct_rank(ctx, x):
# copy here to get around pandas 1.0 issue
@@ -319,7 +319,7 @@ def unary_vector_func_fixture():
@pytest.fixture(scope="session")
def struct_vector_func_fixture():
"""
- Reigster a vector function that returns a struct array
+ Register a vector function that returns a struct array
"""
def pivot(ctx, k, v, c):
df = pa.RecordBatch.from_arrays([k, v, c], names=['k', 'v',
'c']).to_pandas()
@@ -486,7 +486,7 @@ def test_function_doc_validation():
func_doc, in_types,
out_type)
- # doc with no decription
+ # doc with no description
func_doc = {
"summary": "test summary"
}
diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py
index 4f178aefc5..bb693cd663 100644
--- a/python/pyarrow/util.py
+++ b/python/pyarrow/util.py
@@ -42,7 +42,7 @@ def doc(*docstrings, **params):
If the docstring is a template, it will be saved as a string.
Otherwise, it will be saved as a callable and the docstring will be
obtained via
the __doc__ attribute.
- This decorator can not be used on Cython classes due to a CPython
constraint,
+ This decorator cannot be used on Cython classes due to a CPython
constraint,
which enforces the __doc__ attribute to be read-only.
See https://github.com/python/cpython/issues/91309