This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 1b9fe98be6 GH-38968: [C++] Fix spelling (dataset) (#38969)
1b9fe98be6 is described below
commit 1b9fe98be6338d4fae917d271c40261b25118f45
Author: Josh Soref <[email protected]>
AuthorDate: Fri Dec 1 03:25:59 2023 -0500
GH-38968: [C++] Fix spelling (dataset) (#38969)
### Rationale for this change
### What changes are included in this PR?
Spelling fixes to cpp/src/arrow/dataset/
### Are these changes tested?
### Are there any user-facing changes?
* Closes: #38968
Authored-by: Josh Soref <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
cpp/src/arrow/dataset/dataset.h | 2 +-
cpp/src/arrow/dataset/file_json.cc | 4 ++--
cpp/src/arrow/dataset/file_json_test.cc | 2 +-
cpp/src/arrow/dataset/file_parquet.h | 2 +-
cpp/src/arrow/dataset/scan_node.cc | 4 ++--
cpp/src/arrow/dataset/scanner.h | 4 ++--
cpp/src/arrow/dataset/scanner_test.cc | 10 +++++-----
cpp/src/arrow/dataset/subtree_test.cc | 2 +-
cpp/src/arrow/dataset/test_util_internal.h | 2 +-
9 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index 39936fbd7b..1cdd92d5c4 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -398,7 +398,7 @@ class ARROW_DS_EXPORT Dataset : public
std::enable_shared_from_this<Dataset> {
///
/// Currently, `executor` is always the same as
`internal::GetCPUThreadPool()`,
/// which means the results from the underlying fragment generator will be
- /// transfered to the default CPU thread pool. The generator itself is
+ /// transferred to the default CPU thread pool. The generator itself is
/// offloaded to run on the default IO thread pool.
virtual Result<FragmentGenerator> GetFragmentsAsyncImpl(
compute::Expression predicate, arrow::internal::Executor* executor);
diff --git a/cpp/src/arrow/dataset/file_json.cc
b/cpp/src/arrow/dataset/file_json.cc
index 6ca8405f03..1d545c3969 100644
--- a/cpp/src/arrow/dataset/file_json.cc
+++ b/cpp/src/arrow/dataset/file_json.cc
@@ -324,8 +324,8 @@ Result<RecordBatchGenerator> MakeBatchGenerator(
const std::shared_ptr<FileFragment>& file) {
ARROW_ASSIGN_OR_RAISE(auto future, DoOpenReader(file->source(), format,
scan_options));
auto maybe_reader = future.result();
- // Defer errors that occured during reader instantiation since they're
likely related to
- // batch-processing.
+ // Defer errors that occurred during reader instantiation since they're
likely related
+ // to batch-processing.
if (!maybe_reader.ok()) {
return
MakeFailingGenerator<std::shared_ptr<RecordBatch>>(maybe_reader.status());
}
diff --git a/cpp/src/arrow/dataset/file_json_test.cc
b/cpp/src/arrow/dataset/file_json_test.cc
index 3b0647d28f..9626e8a550 100644
--- a/cpp/src/arrow/dataset/file_json_test.cc
+++ b/cpp/src/arrow/dataset/file_json_test.cc
@@ -162,7 +162,7 @@ std::shared_ptr<FileSource> ToFileSource(std::string json) {
return std::make_shared<FileSource>(Buffer::FromString(std::move(json)));
}
-// Mixin for additional JSON-specific tests, compatibile with both format APIs.
+// Mixin for additional JSON-specific tests, compatible with both format APIs.
template <typename T>
class JsonScanMixin {
public:
diff --git a/cpp/src/arrow/dataset/file_parquet.h
b/cpp/src/arrow/dataset/file_parquet.h
index 5132a805bb..f527ce5d70 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -335,7 +335,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public
DatasetFactory {
/// \brief Create a ParquetDatasetFactory from a metadata source.
///
/// Similar to the previous Make definition, but the metadata can be a Buffer
- /// and the base_path is explicited instead of inferred from the metadata
+ /// and the base_path is explicit instead of inferred from the metadata
/// path.
///
/// \param[in] metadata source to open the metadata parquet file from
diff --git a/cpp/src/arrow/dataset/scan_node.cc
b/cpp/src/arrow/dataset/scan_node.cc
index 5ed6eee5dd..c25c5b70ae 100644
--- a/cpp/src/arrow/dataset/scan_node.cc
+++ b/cpp/src/arrow/dataset/scan_node.cc
@@ -94,7 +94,7 @@ Future<AsyncGenerator<std::shared_ptr<Fragment>>>
GetFragments(
/// fragment on disk actually had a column x, and the value was not 7, then we
will prefer
/// the guarantee in this invalid case.
///
-/// Ths next step is to fetch the metadata for the fragment. For some formats
(e.g.
+/// The next step is to fetch the metadata for the fragment. For some formats
(e.g.
/// CSV) this may be quite simple (get the size of the file). For other
formats (e.g.
/// parquet) this is more involved and requires reading data. There is one
metadata
/// io-task per fragment. The metadata io-task creates an
AsyncGenerator<RecordBatch>
@@ -150,7 +150,7 @@ class ScanNode : public acero::ExecNode, public
acero::TracedNode {
}
if (normalized.filter.call() && normalized.filter.IsBound()) {
- // There is no easy way to make sure a filter was bound agaisnt the same
+ // There is no easy way to make sure a filter was bound against the same
// function registry as the one in ctx so we just require it to be
unbound
// FIXME - Do we care if it was bound to a different function registry?
return Status::Invalid("Scan filter must be unbound");
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 5479a0d9db..4479158ff2 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -141,7 +141,7 @@ struct ARROW_DS_EXPORT ScanOptions {
/// Scan-specific options, which can be changed between scans of the same
dataset.
///
/// A dataset consists of one or more individual fragments. A fragment is
anything
-/// that is indepedently scannable, often a file.
+/// that is independently scannable, often a file.
///
/// Batches from all fragments will be converted to a single schema. This
unified
/// schema is referred to as the "dataset schema" and is the output schema for
@@ -230,7 +230,7 @@ struct ARROW_DS_EXPORT ScanV2Options : public
acero::ExecNodeOptions {
/// for example, if scanning a parquet file that has batches with 100MiB of
data
/// then the actual readahead will be at least 100MiB
///
- /// Set to 0 to disable readhead. When disabled, the scanner will read the
+ /// Set to 0 to disable readahead. When disabled, the scanner will read the
/// dataset one batch at a time
///
/// This limit applies across all fragments. If the limit is 32MiB and the
diff --git a/cpp/src/arrow/dataset/scanner_test.cc
b/cpp/src/arrow/dataset/scanner_test.cc
index cde3a725c4..fccfc80032 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -2591,7 +2591,7 @@ TEST(ScanNode, MinimalEndToEnd) {
// for now, specify the projection as the full project expression
(eventually this can
// just be a list of materialized field names)
compute::Expression a_times_2 = call("multiply", {field_ref("a"),
literal(2)});
- // set the projection such that required project experssion field is
included as a
+ // set the projection such that required project expression field is
included as a
// field_ref
compute::Expression project_expr = field_ref("a");
options->projection =
@@ -2686,7 +2686,7 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) {
// for now, specify the projection as the full project expression
(eventually this can
// just be a list of materialized field names)
compute::Expression a_times_2 = call("multiply", {field_ref("a"),
literal(2)});
- // set the projection such that required project experssion field is
included as a
+ // set the projection such that required project expression field is
included as a
// field_ref
compute::Expression project_expr = field_ref("a");
options->projection =
@@ -2778,7 +2778,7 @@ TEST(ScanNode, MinimalGroupedAggEndToEnd) {
// for now, specify the projection as the full project expression
(eventually this can
// just be a list of materialized field names)
compute::Expression a_times_2 = call("multiply", {field_ref("a"),
literal(2)});
- // set the projection such that required project experssion field is
included as a
+ // set the projection such that required project expression field is
included as a
// field_ref
compute::Expression a = field_ref("a");
compute::Expression b = field_ref("b");
@@ -2888,12 +2888,12 @@ TEST(ScanNode, OnlyLoadProjectedFields) {
{acero::Declaration({"scan", dataset::ScanNodeOptions{dataset,
scan_options}})});
ASSERT_OK_AND_ASSIGN(auto actual, acero::DeclarationToTable(declarations));
// Scan node always emits augmented fields so we drop those
- ASSERT_OK_AND_ASSIGN(auto actualMinusAgumented, actual->SelectColumns({0, 1,
2}));
+ ASSERT_OK_AND_ASSIGN(auto actualMinusAugmented, actual->SelectColumns({0, 1,
2}));
auto expected = TableFromJSON(dummy_schema, {R"([
[null, 1, null],
[null, 4, null]
])"});
- AssertTablesEqual(*expected, *actualMinusAgumented,
/*same_chunk_layout=*/false);
+ AssertTablesEqual(*expected, *actualMinusAugmented,
/*same_chunk_layout=*/false);
}
} // namespace dataset
diff --git a/cpp/src/arrow/dataset/subtree_test.cc
b/cpp/src/arrow/dataset/subtree_test.cc
index 75429a5fb7..fc13c20ece 100644
--- a/cpp/src/arrow/dataset/subtree_test.cc
+++ b/cpp/src/arrow/dataset/subtree_test.cc
@@ -133,7 +133,7 @@ void ExpectForestIs(std::vector<FileInfo> infos,
std::vector<PT> expected_roots)
ASSERT_OK(forest.Visit(
[&](Forest::Ref ref) -> Result<bool> {
actual_roots.emplace_back(ref, infos);
- return false; // only vist roots
+ return false; // only visit roots
},
[](Forest::Ref) {}));
diff --git a/cpp/src/arrow/dataset/test_util_internal.h
b/cpp/src/arrow/dataset/test_util_internal.h
index 51d39d532c..de0519afac 100644
--- a/cpp/src/arrow/dataset/test_util_internal.h
+++ b/cpp/src/arrow/dataset/test_util_internal.h
@@ -1257,7 +1257,7 @@ class FileFormatScanNodeMixin : public
FileFormatFixtureMixinV2<FormatHelper>,
int64_t expected_batches() const { return GetParam().num_batches; }
int64_t expected_rows() const { return GetParam().expected_rows(); }
- // Override FileFormatFixtureMixin::GetRandomData to paramterize the #
+ // Override FileFormatFixtureMixin::GetRandomData to parameterize the #
// of batches and rows per batch
std::shared_ptr<RecordBatchReader> GetRandomData(
std::shared_ptr<Schema> schema) override {