(arrow) branch main updated: GH-38968: [C++] Fix spelling (dataset) (#38969)

jorisvandenbossche Fri, 01 Dec 2023 00:26:13 -0800

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 1b9fe98be6 GH-38968: [C++] Fix spelling (dataset) (#38969)
1b9fe98be6 is described below

commit 1b9fe98be6338d4fae917d271c40261b25118f45
Author: Josh Soref <[email protected]>
AuthorDate: Fri Dec 1 03:25:59 2023 -0500

    GH-38968: [C++] Fix spelling (dataset) (#38969)
    
    
    
    ### Rationale for this change
    
    ### What changes are included in this PR?
    
    Spelling fixes to cpp/src/arrow/dataset/
    
    ### Are these changes tested?
    
    ### Are there any user-facing changes?
    
    * Closes: #38968
    
    Authored-by: Josh Soref <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 cpp/src/arrow/dataset/dataset.h            |  2 +-
 cpp/src/arrow/dataset/file_json.cc         |  4 ++--
 cpp/src/arrow/dataset/file_json_test.cc    |  2 +-
 cpp/src/arrow/dataset/file_parquet.h       |  2 +-
 cpp/src/arrow/dataset/scan_node.cc         |  4 ++--
 cpp/src/arrow/dataset/scanner.h            |  4 ++--
 cpp/src/arrow/dataset/scanner_test.cc      | 10 +++++-----
 cpp/src/arrow/dataset/subtree_test.cc      |  2 +-
 cpp/src/arrow/dataset/test_util_internal.h |  2 +-
 9 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index 39936fbd7b..1cdd92d5c4 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -398,7 +398,7 @@ class ARROW_DS_EXPORT Dataset : public 
std::enable_shared_from_this<Dataset> {
   ///
   /// Currently, `executor` is always the same as 
`internal::GetCPUThreadPool()`,
   /// which means the results from the underlying fragment generator will be
-  /// transfered to the default CPU thread pool. The generator itself is
+  /// transferred to the default CPU thread pool. The generator itself is
   /// offloaded to run on the default IO thread pool.
   virtual Result<FragmentGenerator> GetFragmentsAsyncImpl(
       compute::Expression predicate, arrow::internal::Executor* executor);
diff --git a/cpp/src/arrow/dataset/file_json.cc 
b/cpp/src/arrow/dataset/file_json.cc
index 6ca8405f03..1d545c3969 100644
--- a/cpp/src/arrow/dataset/file_json.cc
+++ b/cpp/src/arrow/dataset/file_json.cc
@@ -324,8 +324,8 @@ Result<RecordBatchGenerator> MakeBatchGenerator(
     const std::shared_ptr<FileFragment>& file) {
   ARROW_ASSIGN_OR_RAISE(auto future, DoOpenReader(file->source(), format, 
scan_options));
   auto maybe_reader = future.result();
-  // Defer errors that occured during reader instantiation since they're 
likely related to
-  // batch-processing.
+  // Defer errors that occurred during reader instantiation since they're 
likely related
+  // to batch-processing.
   if (!maybe_reader.ok()) {
     return 
MakeFailingGenerator<std::shared_ptr<RecordBatch>>(maybe_reader.status());
   }
diff --git a/cpp/src/arrow/dataset/file_json_test.cc 
b/cpp/src/arrow/dataset/file_json_test.cc
index 3b0647d28f..9626e8a550 100644
--- a/cpp/src/arrow/dataset/file_json_test.cc
+++ b/cpp/src/arrow/dataset/file_json_test.cc
@@ -162,7 +162,7 @@ std::shared_ptr<FileSource> ToFileSource(std::string json) {
   return std::make_shared<FileSource>(Buffer::FromString(std::move(json)));
 }
 
-// Mixin for additional JSON-specific tests, compatibile with both format APIs.
+// Mixin for additional JSON-specific tests, compatible with both format APIs.
 template <typename T>
 class JsonScanMixin {
  public:
diff --git a/cpp/src/arrow/dataset/file_parquet.h 
b/cpp/src/arrow/dataset/file_parquet.h
index 5132a805bb..f527ce5d70 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -335,7 +335,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public 
DatasetFactory {
   /// \brief Create a ParquetDatasetFactory from a metadata source.
   ///
   /// Similar to the previous Make definition, but the metadata can be a Buffer
-  /// and the base_path is explicited instead of inferred from the metadata
+  /// and the base_path is explicit instead of inferred from the metadata
   /// path.
   ///
   /// \param[in] metadata source to open the metadata parquet file from
diff --git a/cpp/src/arrow/dataset/scan_node.cc 
b/cpp/src/arrow/dataset/scan_node.cc
index 5ed6eee5dd..c25c5b70ae 100644
--- a/cpp/src/arrow/dataset/scan_node.cc
+++ b/cpp/src/arrow/dataset/scan_node.cc
@@ -94,7 +94,7 @@ Future<AsyncGenerator<std::shared_ptr<Fragment>>> 
GetFragments(
 /// fragment on disk actually had a column x, and the value was not 7, then we 
will prefer
 /// the guarantee in this invalid case.
 ///
-/// Ths next step is to fetch the metadata for the fragment.  For some formats 
(e.g.
+/// The next step is to fetch the metadata for the fragment.  For some formats 
(e.g.
 /// CSV) this may be quite simple (get the size of the file).  For other 
formats (e.g.
 /// parquet) this is more involved and requires reading data.  There is one 
metadata
 /// io-task per fragment.  The metadata io-task creates an 
AsyncGenerator<RecordBatch>
@@ -150,7 +150,7 @@ class ScanNode : public acero::ExecNode, public 
acero::TracedNode {
     }
 
     if (normalized.filter.call() && normalized.filter.IsBound()) {
-      // There is no easy way to make sure a filter was bound agaisnt the same
+      // There is no easy way to make sure a filter was bound against the same
       // function registry as the one in ctx so we just require it to be 
unbound
       // FIXME - Do we care if it was bound to a different function registry?
       return Status::Invalid("Scan filter must be unbound");
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index 5479a0d9db..4479158ff2 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -141,7 +141,7 @@ struct ARROW_DS_EXPORT ScanOptions {
 /// Scan-specific options, which can be changed between scans of the same 
dataset.
 ///
 /// A dataset consists of one or more individual fragments.  A fragment is 
anything
-/// that is indepedently scannable, often a file.
+/// that is independently scannable, often a file.
 ///
 /// Batches from all fragments will be converted to a single schema. This 
unified
 /// schema is referred to as the "dataset schema" and is the output schema for
@@ -230,7 +230,7 @@ struct ARROW_DS_EXPORT ScanV2Options : public 
acero::ExecNodeOptions {
   /// for example, if scanning a parquet file that has batches with 100MiB of 
data
   /// then the actual readahead will be at least 100MiB
   ///
-  /// Set to 0 to disable readhead.  When disabled, the scanner will read the
+  /// Set to 0 to disable readahead.  When disabled, the scanner will read the
   /// dataset one batch at a time
   ///
   /// This limit applies across all fragments.  If the limit is 32MiB and the
diff --git a/cpp/src/arrow/dataset/scanner_test.cc 
b/cpp/src/arrow/dataset/scanner_test.cc
index cde3a725c4..fccfc80032 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -2591,7 +2591,7 @@ TEST(ScanNode, MinimalEndToEnd) {
   // for now, specify the projection as the full project expression 
(eventually this can
   // just be a list of materialized field names)
   compute::Expression a_times_2 = call("multiply", {field_ref("a"), 
literal(2)});
-  // set the projection such that required project experssion field is 
included as a
+  // set the projection such that required project expression field is 
included as a
   // field_ref
   compute::Expression project_expr = field_ref("a");
   options->projection =
@@ -2686,7 +2686,7 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) {
   // for now, specify the projection as the full project expression 
(eventually this can
   // just be a list of materialized field names)
   compute::Expression a_times_2 = call("multiply", {field_ref("a"), 
literal(2)});
-  // set the projection such that required project experssion field is 
included as a
+  // set the projection such that required project expression field is 
included as a
   // field_ref
   compute::Expression project_expr = field_ref("a");
   options->projection =
@@ -2778,7 +2778,7 @@ TEST(ScanNode, MinimalGroupedAggEndToEnd) {
   // for now, specify the projection as the full project expression 
(eventually this can
   // just be a list of materialized field names)
   compute::Expression a_times_2 = call("multiply", {field_ref("a"), 
literal(2)});
-  // set the projection such that required project experssion field is 
included as a
+  // set the projection such that required project expression field is 
included as a
   // field_ref
   compute::Expression a = field_ref("a");
   compute::Expression b = field_ref("b");
@@ -2888,12 +2888,12 @@ TEST(ScanNode, OnlyLoadProjectedFields) {
       {acero::Declaration({"scan", dataset::ScanNodeOptions{dataset, 
scan_options}})});
   ASSERT_OK_AND_ASSIGN(auto actual, acero::DeclarationToTable(declarations));
   // Scan node always emits augmented fields so we drop those
-  ASSERT_OK_AND_ASSIGN(auto actualMinusAgumented, actual->SelectColumns({0, 1, 
2}));
+  ASSERT_OK_AND_ASSIGN(auto actualMinusAugmented, actual->SelectColumns({0, 1, 
2}));
   auto expected = TableFromJSON(dummy_schema, {R"([
       [null, 1, null],
       [null, 4, null]
   ])"});
-  AssertTablesEqual(*expected, *actualMinusAgumented, 
/*same_chunk_layout=*/false);
+  AssertTablesEqual(*expected, *actualMinusAugmented, 
/*same_chunk_layout=*/false);
 }
 
 }  // namespace dataset
diff --git a/cpp/src/arrow/dataset/subtree_test.cc 
b/cpp/src/arrow/dataset/subtree_test.cc
index 75429a5fb7..fc13c20ece 100644
--- a/cpp/src/arrow/dataset/subtree_test.cc
+++ b/cpp/src/arrow/dataset/subtree_test.cc
@@ -133,7 +133,7 @@ void ExpectForestIs(std::vector<FileInfo> infos, 
std::vector<PT> expected_roots)
   ASSERT_OK(forest.Visit(
       [&](Forest::Ref ref) -> Result<bool> {
         actual_roots.emplace_back(ref, infos);
-        return false;  // only vist roots
+        return false;  // only visit roots
       },
       [](Forest::Ref) {}));
 
diff --git a/cpp/src/arrow/dataset/test_util_internal.h 
b/cpp/src/arrow/dataset/test_util_internal.h
index 51d39d532c..de0519afac 100644
--- a/cpp/src/arrow/dataset/test_util_internal.h
+++ b/cpp/src/arrow/dataset/test_util_internal.h
@@ -1257,7 +1257,7 @@ class FileFormatScanNodeMixin : public 
FileFormatFixtureMixinV2<FormatHelper>,
   int64_t expected_batches() const { return GetParam().num_batches; }
   int64_t expected_rows() const { return GetParam().expected_rows(); }
 
-  // Override FileFormatFixtureMixin::GetRandomData to paramterize the #
+  // Override FileFormatFixtureMixin::GetRandomData to parameterize the #
   // of batches and rows per batch
   std::shared_ptr<RecordBatchReader> GetRandomData(
       std::shared_ptr<Schema> schema) override {

(arrow) branch main updated: GH-38968: [C++] Fix spelling (dataset) (#38969)

Reply via email to