westonpace commented on code in PR #12843:
URL: https://github.com/apache/arrow/pull/12843#discussion_r850045973


##########
cpp/src/arrow/compute/exec/tpch_node_test.cc:
##########
@@ -570,6 +606,46 @@ TEST(TpchNode, Region) {
   ASSERT_EQ(num_rows, 5);
 }
 
+TEST(TpchNode, Region) {
+  ASSERT_OK_AND_ASSIGN(auto res, GenerateTable(&TpchGen::Region));
+  VerifyRegion(res);
+}
+
+TEST(TpchNode, AllTables) {
+  constexpr double kScaleFactor = 0.05;
+  constexpr int kNumTables = 8;
+  std::array<TableNodeFn, kNumTables> tables = {
+      &TpchGen::Supplier, &TpchGen::Part,     &TpchGen::PartSupp, 
&TpchGen::Customer,
+      &TpchGen::Orders,   &TpchGen::Lineitem, &TpchGen::Nation,   
&TpchGen::Region,
+  };
+  using VerifyFn = void(const std::vector<ExecBatch>&, double);
+  std::array<VerifyFn*, kNumTables> verify_fns = {
+      &VerifySupplier, &VerifyPart,     &VerifyPartSupp, &VerifyCustomer,
+      &VerifyOrders,   &VerifyLineitem, &VerifyNation,   &VerifyRegion,
+  };
+
+  std::array<AsyncGenerator<util::optional<ExecBatch>>, kNumTables> gens;
+  std::array<std::vector<ExecBatch>, kNumTables> batches;
+  ExecContext ctx(default_memory_pool(), arrow::internal::GetCpuThreadPool());
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<ExecPlan> plan, ExecPlan::Make(&ctx));
+  ASSERT_OK_AND_ASSIGN(std::unique_ptr<TpchGen> gen,
+                       TpchGen::Make(plan.get(), kScaleFactor));
+  for (int i = 0; i < kNumTables; i++) {
+    ASSERT_OK(AddTableAndSinkToPlan(*plan, *gen, gens[i], tables[i]));
+  }
+
+  ASSERT_OK(plan->Validate());
+  ASSERT_OK(plan->StartProducing());
+  plan->finished().Wait();

Review Comment:
   ```suggestion
     ASSERT_OK(plan->finished().status());
   ```
   Or there is an `ASSERT_FINISHES_OK(plan->finished())` but I think you might 
need an extra include for that.



##########
cpp/src/arrow/compute/exec/tpch_node_test.cc:
##########
@@ -333,24 +343,15 @@ void CountModifiedComments(const Datum& d, int* 
good_count, int* bad_count) {
   }
 }
 
-TEST(TpchNode, ScaleFactor) {

Review Comment:
   All of these methods moving around makes it a little harder to follow 
changes.  Can we maybe pick an order (e.g. ScaleFactor, each table in 
alphabetical order, AllTables) so we can stick with it?  Or is there some 
meaning behind this new order?



##########
cpp/src/arrow/compute/exec/tpch_node_test.cc:
##########
@@ -365,18 +366,28 @@ TEST(TpchNode, Supplier) {
   }
   ASSERT_EQ(seen_suppkey.size(), kExpectedRows);
   ASSERT_EQ(num_rows, kExpectedRows);
-  ASSERT_EQ(good_count, 5);
-  ASSERT_EQ(bad_count, 5);
+  ASSERT_EQ(good_count, static_cast<int64_t>(5 * scale_factor));
+  ASSERT_EQ(bad_count, static_cast<int64_t>(5 * scale_factor));
 }
 
-TEST(TpchNode, Part) {
-  ASSERT_OK_AND_ASSIGN(auto res, GenerateTable(&TpchGen::Part));
+TEST(TpchNode, ScaleFactor) {
+  constexpr double kScaleFactor = 0.01;
+  ASSERT_OK_AND_ASSIGN(auto res, GenerateTable(&TpchGen::Supplier, 
kScaleFactor));
+  VerifySupplier(res, kScaleFactor);
+}

Review Comment:
   Since `AllTables` is now testing a different scale factor for all of the 
tables I think this test is probably redundant.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to