fgerlits commented on code in PR #1997: URL: https://github.com/apache/nifi-minifi-cpp/pull/1997#discussion_r2330442724
########## extensions/standard-processors/controllers/XMLRecordSetWriter.cpp: ########## @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "XMLRecordSetWriter.h" + +#include "core/Resource.h" +#include "Exception.h" +#include "utils/TimeUtil.h" + +namespace org::apache::nifi::minifi::standard { + +void XMLRecordSetWriter::onEnable() { + if (auto wrap_elements_of_arrays = magic_enum::enum_cast<WrapElementsOfArraysOptions>(getProperty(WrapElementsOfArrays.name).value_or("No Wrapping"))) { + wrap_elements_of_arrays_ = *wrap_elements_of_arrays; + } else { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Invalid value for Wrap Elements of Arrays property: " + getProperty(WrapElementsOfArrays.name).value_or("")); + } + + array_tag_name_ = getProperty(ArrayTagName.name).value_or(""); + if (array_tag_name_.empty() && + (wrap_elements_of_arrays_ == WrapElementsOfArraysOptions::UsePropertyAsWrapper || + wrap_elements_of_arrays_ == WrapElementsOfArraysOptions::UsePropertyForElements)) { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Array Tag Name property must be set when Wrap Elements of Arrays is set to Use Property as Wrapper or Use Property for Elements"); + } + + omit_xml_declaration_ = getProperty(OmitXMLDeclaration.name).value_or("false") == "true"; + pretty_print_xml_ = getProperty(PrettyPrintXML.name).value_or("false") == "true"; + + name_of_record_tag_ = getProperty(NameOfRecordTag.name).value_or(""); + if (name_of_record_tag_.empty()) { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Name of Record Tag property must be set"); + } + + name_of_root_tag_ = getProperty(NameOfRootTag.name).value_or(""); + if (name_of_root_tag_.empty()) { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Name of Root Tag property must be set"); + } +} + +std::string XMLRecordSetWriter::formatXmlOutput(pugi::xml_document& xml_doc) const { + std::ostringstream xml_string_stream; + uint64_t xml_formatting_flags = 0; Review Comment: we could make this `unsigned int` to start with, so we don't need to narrow it in line 64 ########## CONTROLLERS.md: ########## @@ -351,3 +352,23 @@ In the list below, the names of required properties appear in bold. Any other pr | **Parse XML Attributes** | false | true<br/>false | When 'Schema Access Strategy' is 'Infer Schema' and this property is 'true' then XML attributes are parsed and added to the record as new fields. When the schema is inferred but this property is 'false', XML attributes and their values are ignored. | | Attribute Prefix | | | If this property is set, the name of attributes will be prepended with a prefix when they are added to a record. | | **Expect Records as Array** | false | true<br/>false | This property defines whether the reader expects a FlowFile to consist of a single Record or a series of Records with a "wrapper element". Because XML does not provide for a way to read a series of XML documents from a stream directly, it is common to combine many XML documents by concatenating them and then wrapping the entire XML blob with a "wrapper element". This property dictates whether the reader expects a FlowFile to consist of a single Record or a series of Records with a "wrapper element" that will be ignored. | + + +## XMLRecordSetWriter + +### Description + +Writes a RecordSet to XML. The records are wrapped by a root tag. + +### Properties + +In the list below, the names of required properties appear in bold. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the NiFi Expression Language. + +| Name | Default Value | Allowable Values | Description | +|-----------------------------|---------------|-----------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Array Tag Name | | | Name of the tag used by property "Wrap Elements of Arrays" to write arrays | +| **Wrap Elements of Arrays** | No Wrapping | Use Property as Wrapper<br/>Use Property for Elements<br/>No Wrapping | Specifies how the writer wraps elements of fields of type array | +| **Omit XML Declaration** | false | true<br/>false | Specifies whether or not to include XML declaration | +| **Pretty Print XML** | false | true<br/>false | Specifies whether or not the XML should be pretty printed | +| **Name of Record Tag** | | | Specifies the name of the XML record tag wrapping the record fields. | +| **Name of Root Tag** | | | Specifies the name of the XML root tag wrapping the record set. This property has to be defined if the writer is supposed to write multiple records in a single FlowFile. | Review Comment: I don't understand the second sentence: the property is required, so it always has to be defined. ########## extensions/standard-processors/controllers/XMLRecordSetWriter.cpp: ########## @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "XMLRecordSetWriter.h" + +#include "core/Resource.h" +#include "Exception.h" +#include "utils/TimeUtil.h" + +namespace org::apache::nifi::minifi::standard { + +void XMLRecordSetWriter::onEnable() { + if (auto wrap_elements_of_arrays = magic_enum::enum_cast<WrapElementsOfArraysOptions>(getProperty(WrapElementsOfArrays.name).value_or("No Wrapping"))) { + wrap_elements_of_arrays_ = *wrap_elements_of_arrays; + } else { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Invalid value for Wrap Elements of Arrays property: " + getProperty(WrapElementsOfArrays.name).value_or("")); + } + + array_tag_name_ = getProperty(ArrayTagName.name).value_or(""); + if (array_tag_name_.empty() && + (wrap_elements_of_arrays_ == WrapElementsOfArraysOptions::UsePropertyAsWrapper || + wrap_elements_of_arrays_ == WrapElementsOfArraysOptions::UsePropertyForElements)) { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Array Tag Name property must be set when Wrap Elements of Arrays is set to Use Property as Wrapper or Use Property for Elements"); + } + + omit_xml_declaration_ = getProperty(OmitXMLDeclaration.name).value_or("false") == "true"; + pretty_print_xml_ = getProperty(PrettyPrintXML.name).value_or("false") == "true"; Review Comment: I don't like the fact that if the user manually sets these properties to "True" then we'll silently treat it as "false"; can we do some error checking, eg. with `parsing::parseBool()`? ########## CONTROLLERS.md: ########## @@ -351,3 +352,23 @@ In the list below, the names of required properties appear in bold. Any other pr | **Parse XML Attributes** | false | true<br/>false | When 'Schema Access Strategy' is 'Infer Schema' and this property is 'true' then XML attributes are parsed and added to the record as new fields. When the schema is inferred but this property is 'false', XML attributes and their values are ignored. | | Attribute Prefix | | | If this property is set, the name of attributes will be prepended with a prefix when they are added to a record. | | **Expect Records as Array** | false | true<br/>false | This property defines whether the reader expects a FlowFile to consist of a single Record or a series of Records with a "wrapper element". Because XML does not provide for a way to read a series of XML documents from a stream directly, it is common to combine many XML documents by concatenating them and then wrapping the entire XML blob with a "wrapper element". This property dictates whether the reader expects a FlowFile to consist of a single Record or a series of Records with a "wrapper element" that will be ignored. | + + +## XMLRecordSetWriter + +### Description + +Writes a RecordSet to XML. The records are wrapped by a root tag. + +### Properties + +In the list below, the names of required properties appear in bold. Any other properties (not in bold) are considered optional. The table also indicates any default values, and whether a property supports the NiFi Expression Language. + +| Name | Default Value | Allowable Values | Description | +|-----------------------------|---------------|-----------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Array Tag Name | | | Name of the tag used by property "Wrap Elements of Arrays" to write arrays | +| **Wrap Elements of Arrays** | No Wrapping | Use Property as Wrapper<br/>Use Property for Elements<br/>No Wrapping | Specifies how the writer wraps elements of fields of type array | Review Comment: NiFi has an explanation of each of the options; I think that would be useful here, too, because the difference between "Use Property as Wrapper" and "Use Property for Elements" is not obvious. In the long term, it would be nice to have a way to attach descriptions to each allowable value (like NiFi does), but for now we can put this in the Description. ########## extensions/standard-processors/controllers/XMLRecordSetWriter.cpp: ########## @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "XMLRecordSetWriter.h" + +#include "core/Resource.h" +#include "Exception.h" +#include "utils/TimeUtil.h" + +namespace org::apache::nifi::minifi::standard { + +void XMLRecordSetWriter::onEnable() { + if (auto wrap_elements_of_arrays = magic_enum::enum_cast<WrapElementsOfArraysOptions>(getProperty(WrapElementsOfArrays.name).value_or("No Wrapping"))) { + wrap_elements_of_arrays_ = *wrap_elements_of_arrays; + } else { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Invalid value for Wrap Elements of Arrays property: " + getProperty(WrapElementsOfArrays.name).value_or("")); + } + + array_tag_name_ = getProperty(ArrayTagName.name).value_or(""); + if (array_tag_name_.empty() && + (wrap_elements_of_arrays_ == WrapElementsOfArraysOptions::UsePropertyAsWrapper || + wrap_elements_of_arrays_ == WrapElementsOfArraysOptions::UsePropertyForElements)) { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Array Tag Name property must be set when Wrap Elements of Arrays is set to Use Property as Wrapper or Use Property for Elements"); + } + + omit_xml_declaration_ = getProperty(OmitXMLDeclaration.name).value_or("false") == "true"; + pretty_print_xml_ = getProperty(PrettyPrintXML.name).value_or("false") == "true"; + + name_of_record_tag_ = getProperty(NameOfRecordTag.name).value_or(""); + if (name_of_record_tag_.empty()) { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Name of Record Tag property must be set"); + } + + name_of_root_tag_ = getProperty(NameOfRootTag.name).value_or(""); + if (name_of_root_tag_.empty()) { + throw Exception(PROCESS_SCHEDULE_EXCEPTION, "Name of Root Tag property must be set"); + } +} + +std::string XMLRecordSetWriter::formatXmlOutput(pugi::xml_document& xml_doc) const { + std::ostringstream xml_string_stream; + uint64_t xml_formatting_flags = 0; + if (pretty_print_xml_) { + xml_formatting_flags |= pugi::format_indent; + } else { + xml_formatting_flags |= pugi::format_raw; + } + if (omit_xml_declaration_) { + xml_formatting_flags |= pugi::format_no_declaration; + } + xml_doc.save(xml_string_stream, " ", gsl::narrow<unsigned int>(xml_formatting_flags)); + return xml_string_stream.str(); +} + +void XMLRecordSetWriter::convertRecordArrayField(const std::string& field_name, const core::RecordField& field, pugi::xml_node& parent_node) const { + const auto& record_array = std::get<core::RecordArray>(field.value_); + pugi::xml_node array_node; + if (wrap_elements_of_arrays_ == WrapElementsOfArraysOptions::UsePropertyAsWrapper) { + array_node = parent_node.append_child(array_tag_name_.c_str()); + } else if (wrap_elements_of_arrays_ == WrapElementsOfArraysOptions::UsePropertyForElements) { + array_node = parent_node.append_child(field_name.c_str()); + } + for (const auto& array_field : record_array) { + if (wrap_elements_of_arrays_ == WrapElementsOfArraysOptions::UsePropertyAsWrapper) { + convertRecordField(field_name, array_field, array_node); + } else if (wrap_elements_of_arrays_ == WrapElementsOfArraysOptions::UsePropertyForElements) { + convertRecordField(array_tag_name_, array_field, array_node); + } else { + convertRecordField(field_name, array_field, parent_node); + } + } +} + +void XMLRecordSetWriter::convertRecordField(const std::string& field_name, const core::RecordField& field, pugi::xml_node& parent_node) const { + if (std::holds_alternative<core::RecordArray>(field.value_)) { + convertRecordArrayField(field_name, field, parent_node); + return; + } + + pugi::xml_node field_node = parent_node.append_child(field_name.c_str()); + if (std::holds_alternative<std::string>(field.value_)) { + field_node.text().set(std::get<std::string>(field.value_)); + } else if (std::holds_alternative<int64_t>(field.value_)) { + field_node.text().set(std::to_string(std::get<int64_t>(field.value_)).c_str()); + } else if (std::holds_alternative<uint64_t>(field.value_)) { + field_node.text().set(std::to_string(std::get<uint64_t>(field.value_)).c_str()); + } else if (std::holds_alternative<double>(field.value_)) { + field_node.text().set(fmt::format("{:g}", std::get<double>(field.value_)).c_str()); + } else if (std::holds_alternative<bool>(field.value_)) { + field_node.text().set(std::get<bool>(field.value_) ? "true" : "false"); + } else if (std::holds_alternative<std::chrono::system_clock::time_point>(field.value_)) { + auto time_point = std::get<std::chrono::system_clock::time_point>(field.value_); + auto time_str = utils::timeutils::getDateTimeStr(std::chrono::time_point_cast<std::chrono::seconds>(time_point)); + field_node.text().set(time_str.c_str()); + } else if (std::holds_alternative<core::RecordObject>(field.value_)) { + const auto& record_object = std::get<core::RecordObject>(field.value_); + for (const auto& [obj_key, obj_field] : record_object) { + convertRecordField(obj_key, obj_field, field_node); + } + } Review Comment: I think a visitor pattern like in `RecordField::toJson()` would be nicer. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
