lidavidm commented on a change in pull request #10134:
URL: https://github.com/apache/arrow/pull/10134#discussion_r618743453



##########
File path: cpp/src/arrow/dataset/test_util.h
##########
@@ -303,6 +304,316 @@ template <typename P>
 class DatasetFixtureMixinWithParam : public DatasetFixtureMixin,
                                      public ::testing::WithParamInterface<P> 
{};
 
+struct TestScannerParams {
+  bool use_async;
+  bool use_threads;
+  int num_child_datasets;
+  int num_batches;
+  int items_per_batch;
+
+  int64_t total_batches() const { return num_child_datasets * num_batches; }
+
+  int64_t expected_rows() const { return total_batches() * items_per_batch; }
+
+  std::string ToString() const {
+    // GTest requires this to be alphanumeric
+    std::stringstream ss;
+    ss << (use_async ? "Async" : "Sync") << (use_threads ? "Threaded" : 
"Serial")
+       << num_child_datasets << "d" << num_batches << "b" << items_per_batch 
<< "r";
+    return ss.str();
+  }
+
+  static std::string ToTestNameString(
+      const ::testing::TestParamInfo<TestScannerParams>& info) {
+    return std::to_string(info.index) + info.param.ToString();
+  }
+
+  static std::vector<TestScannerParams> Values() {
+    std::vector<TestScannerParams> values;
+    for (int sync = 0; sync < 2; sync++) {
+      for (int use_threads = 0; use_threads < 2; use_threads++) {
+        values.push_back(
+            {static_cast<bool>(sync), static_cast<bool>(use_threads), 1, 1, 
1024});
+        values.push_back(
+            {static_cast<bool>(sync), static_cast<bool>(use_threads), 2, 16, 
1024});
+      }
+    }
+    return values;
+  }
+};
+
+std::ostream& operator<<(std::ostream& out, const TestScannerParams& params) {
+  out << (params.use_async ? "async-" : "sync-")
+      << (params.use_threads ? "threaded-" : "serial-") << 
params.num_child_datasets
+      << "d-" << params.num_batches << "b-" << params.items_per_batch << "i";
+  return out;
+}
+
+class FileFormatWriterMixin {
+  virtual std::shared_ptr<Buffer> Write(RecordBatchReader* reader) = 0;
+  virtual std::shared_ptr<Buffer> Write(const Table& table) = 0;
+};
+
+/// WriterMixin should be a class with these static methods:
+/// std::shared_ptr<Buffer> Write(RecordBatchReader* reader);
+template <typename WriterMixin>
+class FileFormatFixtureMixin : public ::testing::Test {
+ public:
+  constexpr static int64_t kBatchSize = 1UL << 12;
+  constexpr static int64_t kBatchRepetitions = 1 << 5;
+
+  int64_t expected_batches() const { return kBatchRepetitions; }
+  int64_t expected_rows() const { return kBatchSize * kBatchRepetitions; }
+
+  std::shared_ptr<FileSource> GetFileSource(RecordBatchReader* reader) {
+    auto buffer = WriterMixin::Write(reader);
+    return std::make_shared<FileSource>(std::move(buffer));
+  }
+
+  virtual std::shared_ptr<RecordBatchReader> GetRecordBatchReader(
+      std::shared_ptr<Schema> schema) {
+    return MakeGeneratedRecordBatch(schema, kBatchSize, kBatchRepetitions);
+  }
+
+  Result<std::shared_ptr<io::BufferOutputStream>> GetFileSink() {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ResizableBuffer> buffer,
+                          AllocateResizableBuffer(0));
+    return std::make_shared<io::BufferOutputStream>(buffer);
+  }
+
+  void SetSchema(std::vector<std::shared_ptr<Field>> fields) {
+    opts_ = std::make_shared<ScanOptions>();
+    opts_->dataset_schema = schema(std::move(fields));
+    ASSERT_OK(SetProjection(opts_.get(), 
opts_->dataset_schema->field_names()));
+  }
+
+  void SetFilter(Expression filter) {

Review comment:
       Added to ARROW-12311. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to