Github user MikeThomsen commented on a diff in the pull request:
https://github.com/apache/nifi/pull/2614#discussion_r183234943
--- Diff:
nifi-nar-bundles/nifi-pulsar-bundle/nifi-pulsar-processors/src/main/java/org/apache/nifi/processors/pulsar/pubsub/PublishPulsarRecord_1_X.java
---
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.pulsar.pubsub;
+
+import static
org.apache.nifi.processors.pulsar.pubsub.RecordBasedConst.RECORD_READER;
+import static
org.apache.nifi.processors.pulsar.pubsub.RecordBasedConst.RECORD_WRITER;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.RejectedExecutionException;
+import java.util.concurrent.atomic.AtomicLong;
+
+
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.SeeAlso;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processors.pulsar.AbstractPulsarProducerProcessor;
+import org.apache.nifi.schema.access.SchemaNotFoundException;
+import org.apache.nifi.serialization.MalformedRecordException;
+import org.apache.nifi.serialization.RecordReader;
+import org.apache.nifi.serialization.RecordReaderFactory;
+import org.apache.nifi.serialization.RecordSetWriter;
+import org.apache.nifi.serialization.RecordSetWriterFactory;
+import org.apache.nifi.serialization.record.Record;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.apache.nifi.serialization.record.RecordSet;
+import org.apache.nifi.stream.io.StreamUtils;
+import org.apache.nifi.util.StringUtils;
+import org.apache.pulsar.client.api.MessageId;
+import org.apache.pulsar.client.api.Producer;
+
+@Tags({"Apache", "Pulsar", "Record", "csv", "json", "avro", "logs", "Put",
"Send", "Message", "PubSub", "1.0"})
+@CapabilityDescription("Sends the contents of a FlowFile as individual
records to Apache Pulsar using the Pulsar 1.x client API. "
+ + "The contents of the FlowFile are expected to be record-oriented
data that can be read by the configured Record Reader. "
+ + "The complementary NiFi processor for fetching messages is
ConsumePulsarRecord_1_0.")
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@WritesAttribute(attribute = "msg.count", description = "The number of
messages that were sent to Pulsar for this FlowFile. This attribute is added
only to "
+ + "FlowFiles that are routed to success.")
+@SeeAlso({PublishPulsar_1_X.class, ConsumePulsar_1_X.class,
ConsumePulsarRecord_1_X.class})
+public class PublishPulsarRecord_1_X extends
AbstractPulsarProducerProcessor {
+
+ private static final List<PropertyDescriptor> PROPERTIES;
+ private static final Set<Relationship> RELATIONSHIPS;
+
+ static {
+ final List<PropertyDescriptor> properties = new ArrayList<>();
+ properties.add(PULSAR_CLIENT_SERVICE);
+ properties.add(RECORD_READER);
+ properties.add(RECORD_WRITER);
+ properties.add(TOPIC);
+ properties.add(ASYNC_ENABLED);
+ properties.add(MAX_ASYNC_REQUESTS);
+ properties.add(BATCHING_ENABLED);
+ properties.add(BATCHING_MAX_MESSAGES);
+ properties.add(BATCH_INTERVAL);
+ properties.add(BLOCK_IF_QUEUE_FULL);
+ properties.add(COMPRESSION_TYPE);
+ properties.add(MESSAGE_ROUTING_MODE);
+ properties.add(PENDING_MAX_MESSAGES);
+
+ PROPERTIES = Collections.unmodifiableList(properties);
+
+ final Set<Relationship> relationships = new HashSet<>();
+ relationships.add(REL_SUCCESS);
+ relationships.add(REL_FAILURE);
+ RELATIONSHIPS = Collections.unmodifiableSet(relationships);
+ }
+
+ @Override
+ public Set<Relationship> getRelationships() {
+ return RELATIONSHIPS;
+ }
+
+ @Override
+ protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+ return PROPERTIES;
+ }
+
+ @Override
+ public void onTrigger(ProcessContext context, ProcessSession session)
throws ProcessException {
+
+ final FlowFile flowFile = session.get();
+ if (flowFile == null) {
+ return;
+ }
+
+ final String topic =
context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
+
+ if (StringUtils.isBlank(topic)) {
+ getLogger().error("Invalid topic specified {}", new Object[]
{topic});
+ session.transfer(flowFile, REL_FAILURE);
+ return;
+ }
+
+ // Read the contents of the FlowFile into a byte array
+ final byte[] messageContent = new byte[(int) flowFile.getSize()];
--- End diff --
This could break in some use cases. For example, I have processed 50GB text
files using NiFi before. Not a common use case, but I'd like to know what
others think here.
@bbende @markap14 @alopresto @pvillard31 @mattyb149 can one of you take a
look at this and weigh in on whether there is a better way to handle the input
here to accommodate very large record sets?
---