westonpace commented on code in PR #35860:
URL: https://github.com/apache/arrow/pull/35860#discussion_r1213329783
##########
cpp/src/arrow/dataset/file_base.h:
##########
@@ -463,15 +463,21 @@ struct ARROW_DS_EXPORT FileSystemDatasetWriteOptions {
/// \brief Wraps FileSystemDatasetWriteOptions for consumption as
compute::ExecNodeOptions
class ARROW_DS_EXPORT WriteNodeOptions : public acero::ExecNodeOptions {
public:
- explicit WriteNodeOptions(
- FileSystemDatasetWriteOptions options,
- std::shared_ptr<const KeyValueMetadata> custom_metadata = NULLPTR)
- : write_options(std::move(options)),
custom_metadata(std::move(custom_metadata)) {}
+ explicit WriteNodeOptions(FileSystemDatasetWriteOptions options,
+ std::shared_ptr<Schema> custom_schema = NULLPTR)
+ : write_options(std::move(options)),
custom_schema(std::move(custom_schema)) {}
/// \brief Options to control how to write the dataset
FileSystemDatasetWriteOptions write_options;
- /// \brief Optional metadata to attach to written batches
- std::shared_ptr<const KeyValueMetadata> custom_metadata;
Review Comment:
If it were a new feature I would argue it's not worth it (A user could
technically use `DeclarationToSchema` to get the output schema of the plan
leading up to the write and then attach custom metadata to that). However,
given we have already released `custom_metadata`, and I would like Acero's API
to start being stable, I suppose I should set an example. Thanks for the
nudge. I have restored `custom_metadata`
##########
cpp/src/arrow/dataset/file_base.cc:
##########
@@ -475,16 +475,38 @@ Result<acero::ExecNode*> MakeWriteNode(acero::ExecPlan*
plan,
const WriteNodeOptions write_node_options =
checked_cast<const WriteNodeOptions&>(options);
- const std::shared_ptr<const KeyValueMetadata>& custom_metadata =
- write_node_options.custom_metadata;
+ const std::shared_ptr<Schema>& custom_schema =
write_node_options.custom_schema;
const FileSystemDatasetWriteOptions& write_options =
write_node_options.write_options;
+ const std::shared_ptr<Schema>& input_schema = inputs[0]->output_schema();
+
+ if (custom_schema != nullptr) {
+ if (custom_schema->num_fields() != input_schema->num_fields()) {
+ return Status::Invalid(
+ "The provided custom_schema did not have the same number of fields
as the "
+ "data. The custom schema can only be used to add metadata /
nullability to "
+ "fields and cannot change the type or number of fields.");
+ }
+ for (int field_idx = 0; field_idx < input_schema->num_fields();
field_idx++) {
+ if (!input_schema->field(field_idx)->type()->Equals(
+ custom_schema->field(field_idx)->type())) {
+ return Status::Invalid("The provided custom_schema specified type ",
Review Comment:
Switched.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]