rtpsw commented on code in PR #14041:
URL: https://github.com/apache/arrow/pull/14041#discussion_r966131169
##########
cpp/src/arrow/compute/exec/source_node.cc:
##########
@@ -291,13 +291,194 @@ struct TableSourceNode : public SourceNode {
}
};
+template <typename This, typename Options>
+struct SchemaSourceNode : public SourceNode {
+ SchemaSourceNode(ExecPlan* plan, std::shared_ptr<Schema> schema,
+ arrow::AsyncGenerator<util::optional<ExecBatch>> generator)
+ : SourceNode(plan, schema, generator) {}
+
+ static Result<ExecNode*> Make(ExecPlan* plan, std::vector<ExecNode*> inputs,
+ const ExecNodeOptions& options) {
+ RETURN_NOT_OK(ValidateExecNodeInputs(plan, inputs, 0, This::kKindName));
+ const auto& cast_options = checked_cast<const Options&>(options);
+ auto& it_maker = cast_options.it_maker;
+ auto& schema = cast_options.schema;
+
+ auto io_executor = plan->exec_context()->executor();
+ auto it = it_maker();
+
+ RETURN_NOT_OK(ValidateSchemaSourceNodeInput(io_executor, schema,
This::kKindName));
+ ARROW_ASSIGN_OR_RAISE(auto generator, This::MakeGenerator(it, io_executor,
schema));
+ return plan->EmplaceNode<This>(plan, schema, generator);
+ }
+
+ static arrow::Status ValidateSchemaSourceNodeInput(
+ arrow::internal::Executor* io_executor, const std::shared_ptr<Schema>&
schema,
+ const char* kKindName) {
+ if (schema == NULLPTR) {
+ return Status::Invalid(kKindName, " requires schema which is not null");
+ }
+ if (io_executor == NULLPTR) {
+ return Status::Invalid(kKindName, " requires IO-executor which is not
null");
+ }
+
+ return Status::OK();
+ }
+
+ template <typename Item>
+ static Iterator<Enumerated<Item>> MakeEnumeratedIterator(Iterator<Item> it) {
+ // TODO: Should Enumerated<>.index be changed to int64_t? Currently, this
change
+ // causes dataset unit-test failures
Review Comment:
I agree and thought about it too. However, note that `Enumerated` isn't
specific for batches; it can enumerate anything coming out of an iterator,
which might emit more than 2B items. I'm fine leaving it `int`, though I'd
prefer `int32_t`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]