westonpace commented on a change in pull request #10125:
URL: https://github.com/apache/arrow/pull/10125#discussion_r617741363



##########
File path: cpp/src/arrow/c/abi.h
##########
@@ -98,6 +98,116 @@ struct ArrowArrayStream {
   void* private_data;
 };
 
+// EXPERIMENTAL: C push producer interface
+
+// Consumer-provided callbacks for push producers
+//
+// Rules:
+// - any of these callbacks may be called concurrently from multiple threads
+// - any of these callbacks may call back into
+//   `ArrowArrayProducer::pause_producing`, 
`ArrowArrayProducer::resume_producing`
+//   or `ArrowArrayProducer::stop_producing`
+//   (but *not* into `ArrowArrayProducer::release`).
+struct ArrowArrayReceiver {
+  // The producer calls this callback to push a data item after it
+  // has filled the pointer-passed ArrowArray struct.
+  //
+  // The consumer *must* move the ArrowArray struct contents before this
+  // callback returns, as the producer is free to release it immediately
+  // afterwards.
+  //
+  // This callback is allowed to call back any ArrowArrayProducer callback,
+  // except the `release` callback.
+  void (*receive_array)(struct ArrowArrayReceiver*, struct ArrowArray* out);
+
+  // The producer calls this callback to signal an error occurred while
+  // producing data.
+  //
+  // `error` is a non-zero `errno`-compatible error code.
+  //
+  // `message` is an optional null-terminated character array describing
+  // the error, or NULL if no description is available.  The `message`
+  // pointer is only valid until this callback returns, therefore the
+  // consumer must copy its contents if it wants to store the error message.
+  //
+  // This callback is allowed to call back any ArrowArrayProducer callback,
+  // except the `release` callback.
+  void (*receive_error)(struct ArrowArrayReceiver*, int error, const char* 
message);
+
+  // Opaque consumer-specific data.
+  //
+  // This is meant to help the consumer associate calls to the above
+  // callbacks to its internal structures.  If such resources were
+  // dynamically allocated, they should only be released by the consumer
+  // after `ArrowArrayProducer::release` has been called and has returned.
+  void* private_data;
+};
+
+// Push-based array producer
+struct ArrowArrayProducer {
+  // Callback to get the produced data type
+  // (will be the same for all pushed arrays).
+  //
+  // The ArrowSchema must be released independently from the ArrowArrayProducer
+  //
+  // XXX add error return?
+  void (*get_schema)(struct ArrowArrayProducer*, struct ArrowSchema* out);
+
+  // Callback to start producing data
+  //
+  // This function should be called once by the consumer.
+  // It tells the producer that the consumer is ready to be called
+  // on the ArrowArrayReceiver callbacks.
+  //
+  // The ArrowArrayReceiver callbacks may be called *before* this function
+  // returns.  Also, each of the receiver callbacks may be called concurrently,
+  // from multiple threads.
+  void (*start_producing)(struct ArrowArrayProducer*, struct 
ArrowArrayReceiver*);
+
+  // Callback to temporarily pause producing data
+  //
+  // The consumer can use this function to apply backpressure when it is
+  // not ready to receive more data.  However, the producer may still push
+  // data after this function is called (especially if the producer is
+  // multi-threaded, as ensuring serialization may not be convenient).
+  void (*pause_producing)(struct ArrowArrayProducer*);
+
+  // Callback to resume producing data after a pause
+  //
+  // The consumer can use this function to release backpressure when it is
+  // ready to receive data again.  This must only be called after a
+  // call to `pause_producing`.
+  //
+  // Given that `pause_producing` and `resume_producing` may be called
+  // concurrently, it is recommended to implement their functionality
+  // using a counter or semaphore.
+  void (*resume_producing)(struct ArrowArrayProducer*);
+
+  // Callback to start producing data
+  //
+  // This function should be called once by the consumer, and only
+  // after `start_producing` was called. After this call, the producer
+  // should stop pushing data. However, with multi-threaded producers,
+  // it is still possible for the receiver callbacks to be called after
+  // `stop_producing` has been called.
+  //
+  // This function must be implemented idempotently: calling it a second time
+  // (including concurrently) is a no-op.
+  void (*stop_producing)(struct ArrowArrayProducer*);
+
+  // Release callback: release the push producer's own resources.

Review comment:
       Is the intent for this to be called by the producer when it reaches EOF? 
 If not, how does the producer signal it is finished?

##########
File path: cpp/src/arrow/c/abi.h
##########
@@ -98,6 +98,116 @@ struct ArrowArrayStream {
   void* private_data;
 };
 
+// EXPERIMENTAL: C push producer interface
+
+// Consumer-provided callbacks for push producers
+//
+// Rules:
+// - any of these callbacks may be called concurrently from multiple threads
+// - any of these callbacks may call back into
+//   `ArrowArrayProducer::pause_producing`, 
`ArrowArrayProducer::resume_producing`
+//   or `ArrowArrayProducer::stop_producing`
+//   (but *not* into `ArrowArrayProducer::release`).
+struct ArrowArrayReceiver {
+  // The producer calls this callback to push a data item after it
+  // has filled the pointer-passed ArrowArray struct.
+  //
+  // The consumer *must* move the ArrowArray struct contents before this
+  // callback returns, as the producer is free to release it immediately
+  // afterwards.
+  //
+  // This callback is allowed to call back any ArrowArrayProducer callback,
+  // except the `release` callback.
+  void (*receive_array)(struct ArrowArrayReceiver*, struct ArrowArray* out);
+
+  // The producer calls this callback to signal an error occurred while
+  // producing data.
+  //
+  // `error` is a non-zero `errno`-compatible error code.
+  //
+  // `message` is an optional null-terminated character array describing
+  // the error, or NULL if no description is available.  The `message`
+  // pointer is only valid until this callback returns, therefore the
+  // consumer must copy its contents if it wants to store the error message.
+  //
+  // This callback is allowed to call back any ArrowArrayProducer callback,
+  // except the `release` callback.
+  void (*receive_error)(struct ArrowArrayReceiver*, int error, const char* 
message);
+
+  // Opaque consumer-specific data.
+  //
+  // This is meant to help the consumer associate calls to the above
+  // callbacks to its internal structures.  If such resources were
+  // dynamically allocated, they should only be released by the consumer
+  // after `ArrowArrayProducer::release` has been called and has returned.
+  void* private_data;
+};
+
+// Push-based array producer
+struct ArrowArrayProducer {
+  // Callback to get the produced data type
+  // (will be the same for all pushed arrays).
+  //
+  // The ArrowSchema must be released independently from the ArrowArrayProducer
+  //
+  // XXX add error return?
+  void (*get_schema)(struct ArrowArrayProducer*, struct ArrowSchema* out);
+
+  // Callback to start producing data
+  //
+  // This function should be called once by the consumer.
+  // It tells the producer that the consumer is ready to be called
+  // on the ArrowArrayReceiver callbacks.
+  //
+  // The ArrowArrayReceiver callbacks may be called *before* this function
+  // returns.  Also, each of the receiver callbacks may be called concurrently,
+  // from multiple threads.
+  void (*start_producing)(struct ArrowArrayProducer*, struct 
ArrowArrayReceiver*);
+
+  // Callback to temporarily pause producing data
+  //
+  // The consumer can use this function to apply backpressure when it is
+  // not ready to receive more data.  However, the producer may still push
+  // data after this function is called (especially if the producer is
+  // multi-threaded, as ensuring serialization may not be convenient).
+  void (*pause_producing)(struct ArrowArrayProducer*);
+
+  // Callback to resume producing data after a pause
+  //
+  // The consumer can use this function to release backpressure when it is
+  // ready to receive data again.  This must only be called after a
+  // call to `pause_producing`.
+  //
+  // Given that `pause_producing` and `resume_producing` may be called
+  // concurrently, it is recommended to implement their functionality
+  // using a counter or semaphore.
+  void (*resume_producing)(struct ArrowArrayProducer*);
+
+  // Callback to start producing data

Review comment:
       `start` -> `stop`

##########
File path: cpp/src/arrow/c/abi.h
##########
@@ -98,6 +98,116 @@ struct ArrowArrayStream {
   void* private_data;
 };
 
+// EXPERIMENTAL: C push producer interface
+
+// Consumer-provided callbacks for push producers
+//
+// Rules:
+// - any of these callbacks may be called concurrently from multiple threads
+// - any of these callbacks may call back into
+//   `ArrowArrayProducer::pause_producing`, 
`ArrowArrayProducer::resume_producing`
+//   or `ArrowArrayProducer::stop_producing`
+//   (but *not* into `ArrowArrayProducer::release`).
+struct ArrowArrayReceiver {
+  // The producer calls this callback to push a data item after it
+  // has filled the pointer-passed ArrowArray struct.
+  //
+  // The consumer *must* move the ArrowArray struct contents before this
+  // callback returns, as the producer is free to release it immediately
+  // afterwards.
+  //
+  // This callback is allowed to call back any ArrowArrayProducer callback,
+  // except the `release` callback.
+  void (*receive_array)(struct ArrowArrayReceiver*, struct ArrowArray* out);
+
+  // The producer calls this callback to signal an error occurred while
+  // producing data.
+  //
+  // `error` is a non-zero `errno`-compatible error code.
+  //
+  // `message` is an optional null-terminated character array describing
+  // the error, or NULL if no description is available.  The `message`
+  // pointer is only valid until this callback returns, therefore the
+  // consumer must copy its contents if it wants to store the error message.
+  //
+  // This callback is allowed to call back any ArrowArrayProducer callback,
+  // except the `release` callback.
+  void (*receive_error)(struct ArrowArrayReceiver*, int error, const char* 
message);
+
+  // Opaque consumer-specific data.
+  //
+  // This is meant to help the consumer associate calls to the above
+  // callbacks to its internal structures.  If such resources were
+  // dynamically allocated, they should only be released by the consumer
+  // after `ArrowArrayProducer::release` has been called and has returned.
+  void* private_data;
+};
+
+// Push-based array producer
+struct ArrowArrayProducer {
+  // Callback to get the produced data type
+  // (will be the same for all pushed arrays).
+  //
+  // The ArrowSchema must be released independently from the ArrowArrayProducer

Review comment:
       This sentence is a little confusing.  Maybe `The ArrowSchema will be 
released independently` or `The ArrowArrayProducer must allow this 
`ArrowSchema` to be released independently`?  Or maybe I am misunderstanding.

##########
File path: cpp/src/arrow/c/abi.h
##########
@@ -98,6 +98,116 @@ struct ArrowArrayStream {
   void* private_data;
 };
 
+// EXPERIMENTAL: C push producer interface
+
+// Consumer-provided callbacks for push producers
+//
+// Rules:
+// - any of these callbacks may be called concurrently from multiple threads
+// - any of these callbacks may call back into
+//   `ArrowArrayProducer::pause_producing`, 
`ArrowArrayProducer::resume_producing`
+//   or `ArrowArrayProducer::stop_producing`
+//   (but *not* into `ArrowArrayProducer::release`).
+struct ArrowArrayReceiver {
+  // The producer calls this callback to push a data item after it
+  // has filled the pointer-passed ArrowArray struct.
+  //
+  // The consumer *must* move the ArrowArray struct contents before this
+  // callback returns, as the producer is free to release it immediately
+  // afterwards.
+  //
+  // This callback is allowed to call back any ArrowArrayProducer callback,
+  // except the `release` callback.
+  void (*receive_array)(struct ArrowArrayReceiver*, struct ArrowArray* out);
+
+  // The producer calls this callback to signal an error occurred while
+  // producing data.
+  //
+  // `error` is a non-zero `errno`-compatible error code.
+  //
+  // `message` is an optional null-terminated character array describing
+  // the error, or NULL if no description is available.  The `message`
+  // pointer is only valid until this callback returns, therefore the
+  // consumer must copy its contents if it wants to store the error message.
+  //
+  // This callback is allowed to call back any ArrowArrayProducer callback,
+  // except the `release` callback.
+  void (*receive_error)(struct ArrowArrayReceiver*, int error, const char* 
message);
+
+  // Opaque consumer-specific data.
+  //
+  // This is meant to help the consumer associate calls to the above
+  // callbacks to its internal structures.  If such resources were
+  // dynamically allocated, they should only be released by the consumer
+  // after `ArrowArrayProducer::release` has been called and has returned.
+  void* private_data;
+};
+
+// Push-based array producer
+struct ArrowArrayProducer {
+  // Callback to get the produced data type
+  // (will be the same for all pushed arrays).
+  //
+  // The ArrowSchema must be released independently from the ArrowArrayProducer
+  //
+  // XXX add error return?
+  void (*get_schema)(struct ArrowArrayProducer*, struct ArrowSchema* out);

Review comment:
       How does this fit into the lifecycle?  Will it be called once before 
`start_producing`?  Or can it be called periodically throughout?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to