westonpace commented on code in PR #34060:
URL: https://github.com/apache/arrow/pull/34060#discussion_r1099547050
##########
cpp/src/arrow/testing/generator.h:
##########
@@ -234,4 +236,84 @@ class ARROW_TESTING_EXPORT ConstantArrayGenerator {
ARROW_TESTING_EXPORT
Result<std::shared_ptr<Array>> ScalarVectorToArray(const ScalarVector&
scalars);
+namespace gen {
+
+class ARROW_TESTING_EXPORT ArrayGenerator {
+ public:
+ virtual ~ArrayGenerator() = default;
+ virtual Result<std::shared_ptr<Array>> Generate(int64_t num_rows) = 0;
+ virtual std::shared_ptr<DataType> type() const = 0;
+};
+
+class ARROW_TESTING_EXPORT DataGenerator {
+ public:
+ virtual ~DataGenerator() = default;
+ virtual Result<std::shared_ptr<::arrow::RecordBatch>> RecordBatch(int64_t
num_rows) = 0;
+ virtual Result<std::vector<std::shared_ptr<::arrow::RecordBatch>>>
RecordBatches(
+ int64_t rows_per_batch, int num_batches) = 0;
+#ifdef ARROW_COMPUTE
+ virtual Result<::arrow::compute::ExecBatch> ExecBatch(int64_t num_rows) = 0;
+ virtual Result<std::vector<::arrow::compute::ExecBatch>> ExecBatches(
+ int64_t rows_per_batch, int num_batches) = 0;
+#endif
+ virtual Result<std::shared_ptr<::arrow::Table>> Table(int64_t rows_per_chunk,
+ int num_chunks = 1) =
0;
+ virtual std::shared_ptr<::arrow::Schema> Schema() = 0;
+};
+
+// Same as DataGenerator but instead of returning Result an ok status is
EXPECT'd
+class ARROW_TESTING_EXPORT GTestDataGenerator {
+ public:
+ virtual ~GTestDataGenerator() = default;
+ virtual std::shared_ptr<::arrow::RecordBatch> RecordBatch(int64_t num_rows)
= 0;
+ virtual std::vector<std::shared_ptr<::arrow::RecordBatch>> RecordBatches(
+ int64_t rows_per_batch, int num_batches) = 0;
+#ifdef ARROW_COMPUTE
+ virtual ::arrow::compute::ExecBatch ExecBatch(int64_t num_rows) = 0;
+ virtual std::vector<::arrow::compute::ExecBatch> ExecBatches(int64_t
rows_per_batch,
+ int
num_batches) = 0;
+#endif
+ virtual std::shared_ptr<::arrow::Table> Table(int64_t rows_per_chunk,
+ int num_chunks = 1) = 0;
+ virtual std::shared_ptr<::arrow::Schema> Schema() = 0;
+};
+
+struct ARROW_TESTING_EXPORT GeneratorField {
+ std::string name;
+ std::shared_ptr<ArrayGenerator> gen;
+};
+
+ARROW_TESTING_EXPORT std::unique_ptr<DataGenerator> Gen(
+ std::vector<std::shared_ptr<ArrayGenerator>> column_gens);
+ARROW_TESTING_EXPORT std::unique_ptr<DataGenerator> Gen(
+ std::vector<GeneratorField> column_gens);
+// For generating batches with 0 columns (though they can still have length)
+ARROW_TESTING_EXPORT std::unique_ptr<DataGenerator> EmptyGen();
+
+ARROW_TESTING_EXPORT std::unique_ptr<GTestDataGenerator> TestGen(
+ std::vector<std::shared_ptr<ArrayGenerator>> column_gens);
+ARROW_TESTING_EXPORT std::unique_ptr<GTestDataGenerator> TestGen(
+ std::vector<GeneratorField> column_gens);
+// For generating batches with 0 columns (though they can still have length)
+ARROW_TESTING_EXPORT std::unique_ptr<GTestDataGenerator> EmptyTestGen();
Review Comment:
It would be nice to avoid the need for `EmptyGen` too. I'll revisit this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]