fgerlits commented on a change in pull request #1268: URL: https://github.com/apache/nifi-minifi-cpp/pull/1268#discussion_r827886303
########## File path: extensions/gcp/processors/PutGCSObject.h ########## @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <string> +#include <memory> + +#include "core/Processor.h" +#include "core/logging/Logger.h" +#include "core/logging/LoggerConfiguration.h" +#include "../controllerservices/GCPCredentialsControllerService.h" +#include "google/cloud/storage/client.h" +#include "google/cloud/storage/retry_policy.h" +#include "utils/Enum.h" + +namespace org::apache::nifi::minifi::extensions::gcp { + +class PutGCSObject : public core::Processor { + public: + SMART_ENUM(PredefinedAcl, + (AUTHENTICATED_READ, "authenticatedRead"), + (BUCKET_OWNER_FULL_CONTROL, "bucketOwnerFullControl"), + (BUCKET_OWNER_READ_ONLY, "bucketOwnerRead"), + (PRIVATE, "private"), + (PROJECT_PRIVATE, "projectPrivate"), + (PUBLIC_READ_ONLY, "publicRead"), + (PUBLIC_READ_WRITE, "publicReadWrite")); Review comment: I think ``` SMART_ENUM(PredefinedAcl, (AUTHENTICATED_READ, gcs::PredefinedAcl::AuthenticatedRead.value()), // etc ``` would be better. The generated docs, both in PROCESSORS.md and in the manifest, will contain the actual strings. EDIT: I can see that you have a unit test which checks that the values in the two enums are equal. I still think the suggestion above would be better (and then the unit test could be removed), but I'm OK with the code as it is, too. ########## File path: docker/test/integration/steps/steps.py ########## @@ -417,6 +425,25 @@ def step_impl(context): context.test.cluster.enable_splunk_hec_ssl('splunk', dump_certificate(splunk_cert), dump_privatekey(splunk_key), dump_certificate(root_ca_cert)) +@given(u'{processor_one} processor is set up with a GcpCredentialsControllerService to communicate with the Google Cloud storage server') +def step_impl(context, processor_one): + gcp_controller_service = GcpCredentialsControllerService(credentials_location="Use Anonymous credentials") + p1 = context.test.get_node_by_name(processor_one) + p1.controller_services.append(gcp_controller_service) + p1.set_property("GCP Credentials Provider Service", gcp_controller_service.name) + + +@given(u'{processor_one} processor and {processor_two} processor are set up with a GcpCredentialsControllerService to communicate with the Google Cloud storage server') Review comment: As we don't use this `step_impl` at the moment, I would delete it; we can add it later if we need it. If you want to keep it, then please add the articles: ```suggestion @given(u'the {processor_one} processor and the {processor_two} processor are set up with a GcpCredentialsControllerService to communicate with the Google Cloud storage server') ``` ########## File path: extensions/gcp/processors/PutGCSObject.h ########## @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <string> +#include <memory> + +#include "core/Processor.h" +#include "core/logging/Logger.h" +#include "core/logging/LoggerConfiguration.h" +#include "../controllerservices/GCPCredentialsControllerService.h" +#include "google/cloud/storage/client.h" +#include "google/cloud/storage/retry_policy.h" +#include "utils/Enum.h" + +namespace org::apache::nifi::minifi::extensions::gcp { + +class PutGCSObject : public core::Processor { + public: + SMART_ENUM(PredefinedAcl, + (AUTHENTICATED_READ, "authenticatedRead"), + (BUCKET_OWNER_FULL_CONTROL, "bucketOwnerFullControl"), + (BUCKET_OWNER_READ_ONLY, "bucketOwnerRead"), + (PRIVATE, "private"), + (PROJECT_PRIVATE, "projectPrivate"), + (PUBLIC_READ_ONLY, "publicRead"), + (PUBLIC_READ_WRITE, "publicReadWrite")); + + explicit PutGCSObject(const std::string& name, const utils::Identifier& uuid = {}) + : core::Processor(name, uuid) { + } + PutGCSObject(const PutGCSObject&) = delete; + PutGCSObject(PutGCSObject&&) = delete; + PutGCSObject& operator=(const PutGCSObject&) = delete; + PutGCSObject& operator=(PutGCSObject&&) = delete; + ~PutGCSObject() override = default; + + EXTENSIONAPI static const core::Property GCPCredentials; + EXTENSIONAPI static const core::Property Bucket; + EXTENSIONAPI static const core::Property Key; + EXTENSIONAPI static const core::Property NumberOfRetries; + EXTENSIONAPI static const core::Property ContentType; + EXTENSIONAPI static const core::Property MD5Hash; + EXTENSIONAPI static const core::Property Crc32cChecksum; + EXTENSIONAPI static const core::Property EncryptionKey; + EXTENSIONAPI static const core::Property ObjectACL; + EXTENSIONAPI static const core::Property OverwriteObject; + EXTENSIONAPI static const core::Property EndpointOverrideURL; + + EXTENSIONAPI static const core::Relationship Success; + EXTENSIONAPI static const core::Relationship Failure; + + void initialize() override; + void onSchedule(const std::shared_ptr<core::ProcessContext> &context, const std::shared_ptr<core::ProcessSessionFactory> &sessionFactory) override; + void onTrigger(const std::shared_ptr<core::ProcessContext>& context, const std::shared_ptr<core::ProcessSession>& session) override; + + core::annotation::Input getInputRequirement() const override { + return core::annotation::Input::INPUT_REQUIRED; + } + + bool isSingleThreaded() const override { + return true; + } + + protected: + virtual google::cloud::storage::Client getClient(const google::cloud::storage::ClientOptions& options) const; + + google::cloud::storage::RetryPolicyOption::Type retry_policy_ = std::make_shared<google::cloud::storage::LimitedErrorCountRetryPolicy>(6); + google::cloud::storage::EncryptionKey encryption_key_; + + std::shared_ptr<google::cloud::storage::oauth2::Credentials> gcp_credentials_; + std::shared_ptr<core::logging::Logger> logger_ = core::logging::LoggerFactory<PutGCSObject>::getLogger(); Review comment: `getClient()` could actually be `private`, that still allows the mock to override it -- but I don't mind if you want to keep it `protected` But I would make the rest of the fields `private`, with a `protected` accessor for those which the mock needs to access. ########## File path: extensions/gcp/GCPAttributes.h ########## @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "google/cloud/storage/object_metadata.h" +#include "core/FlowFile.h" + +namespace org::apache::nifi::minifi::extensions::gcp { + +constexpr const char* GCS_ERROR_REASON = "gcs.error.reason"; +constexpr const char* GCS_ERROR_DOMAIN = "gcs.error.domain"; +constexpr const char* GCS_BUCKET_ATTR = "gcs.bucket"; +constexpr const char* GCS_OBJECT_NAME_ATTR = "gcs.key"; +constexpr const char* GCS_SIZE_ATTR = "gcs.size"; +constexpr const char* GCS_CRC32C_ATTR = "gcs.crc32c"; +constexpr const char* GCS_MD5_ATTR = "gcs.md5"; +constexpr const char* GCS_OWNER_ENTITY_ATTR = "gcs.owner.entity"; +constexpr const char* GCS_OWNER_ENTITY_ID_ATTR = "gcs.owner.entity.id"; +constexpr const char* GCS_MEDIA_LINK_ATTR = "gcs.media.link"; +constexpr const char* GCS_ETAG_ATTR = "gcs.etag"; +constexpr const char* GCS_GENERATED_ID = "gcs.generated.id"; +constexpr const char* GCS_GENERATION = "gcs.generation"; +constexpr const char* GCS_META_GENERATION = "gcs.metageneration"; +constexpr const char* GCS_STORAGE_CLASS = "gcs.storage.class"; +constexpr const char* GCS_CONTENT_ENCODING_ATTR = "gcs.content.encoding"; +constexpr const char* GCS_CONTENT_LANGUAGE_ATTR = "gcs.content.language"; +constexpr const char* GCS_CONTENT_DISPOSITION_ATTR = "gcs.content.disposition"; +constexpr const char* GCS_CREATE_TIME_ATTR = "gcs.create.time"; +constexpr const char* GCS_DELETE_TIME_ATTR = "gcs.delete.time"; +constexpr const char* GCS_UPDATE_TIME_ATTR = "gcs.update.time"; +constexpr const char* GCS_SELF_LINK_ATTR = "gcs.self.link"; +constexpr const char* GCS_ENCRYPTION_ALGORITHM_ATTR = "gcs.encryption.algorithm"; +constexpr const char* GCS_ENCRYPTION_SHA256_ATTR = "gcs.encryption.sha256"; + +inline void setAttributesFromObjectMetadata(core::FlowFile& flow_file, const ::google::cloud::storage::ObjectMetadata& object_metadata) { + flow_file.setAttribute(GCS_BUCKET_ATTR, object_metadata.bucket()); + flow_file.setAttribute(GCS_OBJECT_NAME_ATTR, object_metadata.name()); + flow_file.setAttribute(GCS_SIZE_ATTR, std::to_string(object_metadata.size())); + flow_file.setAttribute(GCS_CRC32C_ATTR, object_metadata.crc32c()); + flow_file.setAttribute(GCS_MD5_ATTR, object_metadata.md5_hash()); + flow_file.setAttribute(GCS_CONTENT_ENCODING_ATTR, object_metadata.content_encoding()); + flow_file.setAttribute(GCS_CONTENT_LANGUAGE_ATTR, object_metadata.content_language()); + flow_file.setAttribute(GCS_CONTENT_DISPOSITION_ATTR, object_metadata.content_disposition()); + flow_file.setAttribute(GCS_CREATE_TIME_ATTR, std::to_string(object_metadata.time_created().time_since_epoch().count())); + flow_file.setAttribute(GCS_UPDATE_TIME_ATTR, std::to_string(object_metadata.updated().time_since_epoch().count())); + flow_file.setAttribute(GCS_DELETE_TIME_ATTR, std::to_string(object_metadata.time_deleted().time_since_epoch().count())); Review comment: Are these in seconds, milliseconds, or something else? I think we should either write these timestamps in an unambiguous format like iso8601, or add something like ".unix-timestamp.seconds" to their name and put a `duration_cast` here. ########## File path: extensions/gcp/GCPAttributes.h ########## @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "google/cloud/storage/object_metadata.h" +#include "core/FlowFile.h" + +namespace org::apache::nifi::minifi::extensions::gcp { + +constexpr const char* GCS_ERROR_REASON = "gcs.error.reason"; +constexpr const char* GCS_ERROR_DOMAIN = "gcs.error.domain"; +constexpr const char* GCS_BUCKET_ATTR = "gcs.bucket"; +constexpr const char* GCS_OBJECT_NAME_ATTR = "gcs.key"; +constexpr const char* GCS_SIZE_ATTR = "gcs.size"; +constexpr const char* GCS_CRC32C_ATTR = "gcs.crc32c"; +constexpr const char* GCS_MD5_ATTR = "gcs.md5"; +constexpr const char* GCS_OWNER_ENTITY_ATTR = "gcs.owner.entity"; +constexpr const char* GCS_OWNER_ENTITY_ID_ATTR = "gcs.owner.entity.id"; +constexpr const char* GCS_MEDIA_LINK_ATTR = "gcs.media.link"; +constexpr const char* GCS_ETAG_ATTR = "gcs.etag"; +constexpr const char* GCS_GENERATED_ID = "gcs.generated.id"; +constexpr const char* GCS_GENERATION = "gcs.generation"; +constexpr const char* GCS_META_GENERATION = "gcs.metageneration"; +constexpr const char* GCS_STORAGE_CLASS = "gcs.storage.class"; +constexpr const char* GCS_CONTENT_ENCODING_ATTR = "gcs.content.encoding"; +constexpr const char* GCS_CONTENT_LANGUAGE_ATTR = "gcs.content.language"; +constexpr const char* GCS_CONTENT_DISPOSITION_ATTR = "gcs.content.disposition"; +constexpr const char* GCS_CREATE_TIME_ATTR = "gcs.create.time"; +constexpr const char* GCS_DELETE_TIME_ATTR = "gcs.delete.time"; +constexpr const char* GCS_UPDATE_TIME_ATTR = "gcs.update.time"; +constexpr const char* GCS_SELF_LINK_ATTR = "gcs.self.link"; +constexpr const char* GCS_ENCRYPTION_ALGORITHM_ATTR = "gcs.encryption.algorithm"; +constexpr const char* GCS_ENCRYPTION_SHA256_ATTR = "gcs.encryption.sha256"; Review comment: I would call this `gcs.encryption.key.sha256` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
