Github user MikeThomsen commented on a diff in the pull request: https://github.com/apache/nifi/pull/2614#discussion_r183235123 --- Diff: nifi-nar-bundles/nifi-pulsar-bundle/nifi-pulsar-processors/src/main/java/org/apache/nifi/processors/pulsar/pubsub/PublishPulsarRecord_1_X.java --- @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.pulsar.pubsub; + +import static org.apache.nifi.processors.pulsar.pubsub.RecordBasedConst.RECORD_READER; +import static org.apache.nifi.processors.pulsar.pubsub.RecordBasedConst.RECORD_WRITER; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.atomic.AtomicLong; + + +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.SeeAlso; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.io.InputStreamCallback; +import org.apache.nifi.processors.pulsar.AbstractPulsarProducerProcessor; +import org.apache.nifi.schema.access.SchemaNotFoundException; +import org.apache.nifi.serialization.MalformedRecordException; +import org.apache.nifi.serialization.RecordReader; +import org.apache.nifi.serialization.RecordReaderFactory; +import org.apache.nifi.serialization.RecordSetWriter; +import org.apache.nifi.serialization.RecordSetWriterFactory; +import org.apache.nifi.serialization.record.Record; +import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.serialization.record.RecordSet; +import org.apache.nifi.stream.io.StreamUtils; +import org.apache.nifi.util.StringUtils; +import org.apache.pulsar.client.api.MessageId; +import org.apache.pulsar.client.api.Producer; + +@Tags({"Apache", "Pulsar", "Record", "csv", "json", "avro", "logs", "Put", "Send", "Message", "PubSub", "1.0"}) +@CapabilityDescription("Sends the contents of a FlowFile as individual records to Apache Pulsar using the Pulsar 1.x client API. " + + "The contents of the FlowFile are expected to be record-oriented data that can be read by the configured Record Reader. " + + "The complementary NiFi processor for fetching messages is ConsumePulsarRecord_1_0.") +@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) +@WritesAttribute(attribute = "msg.count", description = "The number of messages that were sent to Pulsar for this FlowFile. This attribute is added only to " + + "FlowFiles that are routed to success.") +@SeeAlso({PublishPulsar_1_X.class, ConsumePulsar_1_X.class, ConsumePulsarRecord_1_X.class}) +public class PublishPulsarRecord_1_X extends AbstractPulsarProducerProcessor { + + private static final List<PropertyDescriptor> PROPERTIES; + private static final Set<Relationship> RELATIONSHIPS; + + static { + final List<PropertyDescriptor> properties = new ArrayList<>(); + properties.add(PULSAR_CLIENT_SERVICE); + properties.add(RECORD_READER); + properties.add(RECORD_WRITER); + properties.add(TOPIC); + properties.add(ASYNC_ENABLED); + properties.add(MAX_ASYNC_REQUESTS); + properties.add(BATCHING_ENABLED); + properties.add(BATCHING_MAX_MESSAGES); + properties.add(BATCH_INTERVAL); + properties.add(BLOCK_IF_QUEUE_FULL); + properties.add(COMPRESSION_TYPE); + properties.add(MESSAGE_ROUTING_MODE); + properties.add(PENDING_MAX_MESSAGES); + + PROPERTIES = Collections.unmodifiableList(properties); + + final Set<Relationship> relationships = new HashSet<>(); + relationships.add(REL_SUCCESS); + relationships.add(REL_FAILURE); + RELATIONSHIPS = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return RELATIONSHIPS; + } + + @Override + protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return PROPERTIES; + } + + @Override + public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { + + final FlowFile flowFile = session.get(); + if (flowFile == null) { + return; + } + + final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue(); + + if (StringUtils.isBlank(topic)) { + getLogger().error("Invalid topic specified {}", new Object[] {topic}); + session.transfer(flowFile, REL_FAILURE); + return; + } + + // Read the contents of the FlowFile into a byte array + final byte[] messageContent = new byte[(int) flowFile.getSize()]; + session.read(flowFile, new InputStreamCallback() { + @Override + public void process(final InputStream in) throws IOException { + StreamUtils.fillBuffer(in, messageContent, true); + } + }); + + // Nothing to do, so skip this Flow file. + if (messageContent == null || messageContent.length < 1) { + session.transfer(flowFile, REL_SUCCESS); + return; + } + + final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER) + .asControllerService(RecordReaderFactory.class); + + final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER) + .asControllerService(RecordSetWriterFactory.class); + + final Map<String, String> attributes = flowFile.getAttributes(); + final AtomicLong messagesSent = new AtomicLong(0L); + + try { + final InputStream in = new ByteArrayInputStream(messageContent); + final RecordReader reader = readerFactory.createRecordReader(attributes, in, getLogger()); + final RecordSet recordSet = reader.createRecordSet(); + final RecordSchema schema = writerFactory.getSchema(attributes, recordSet.getSchema()); + final Producer producer = getWrappedProducer(topic, context).getProducer(); + + if (context.getProperty(ASYNC_ENABLED).isSet() && context.getProperty(ASYNC_ENABLED).asBoolean()) { + InFlightMessageMonitor bundle = getInFlightMessages(writerFactory, schema, recordSet); + this.sendAsync(producer, session, flowFile, bundle); + handleAsync(bundle, session, flowFile, topic); + } else { + messagesSent.addAndGet(send(producer, writerFactory, schema, recordSet)); + session.putAttribute(flowFile, MSG_COUNT, messagesSent.get() + ""); + session.putAttribute(flowFile, TOPIC_NAME, topic); + session.adjustCounter("Messages Sent", messagesSent.get(), true); + session.getProvenanceReporter().send(flowFile, "Sent " + messagesSent.get() + " records to " + topic ); + session.transfer(flowFile, REL_SUCCESS); + } + } catch (final SchemaNotFoundException | MalformedRecordException | IOException e) { + session.transfer(flowFile, REL_FAILURE); + } + + } + + private int send(final Producer producer, final RecordSetWriterFactory writerFactory, final RecordSchema schema, final RecordSet recordSet) throws IOException, SchemaNotFoundException { + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024); + + Record record; + int recordCount = 0; + + while ((record = recordSet.next()) != null) { + recordCount++; + baos.reset(); + + try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, baos)) { + writer.write(record); + writer.flush(); + } + + producer.send(baos.toByteArray()); + } + + return recordCount; + } + + private InFlightMessageMonitor getInFlightMessages(RecordSetWriterFactory writerFactory, RecordSchema schema, RecordSet recordSet) throws IOException, SchemaNotFoundException { + ArrayList<byte[]> records = new ArrayList<byte[]>(); + final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024); + + Record record; + + while ((record = recordSet.next()) != null) { + baos.reset(); + + try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, baos)) { + writer.write(record); + writer.flush(); + } + records.add(baos.toByteArray()); + } + + return new InFlightMessageMonitor(records); + } + + /* Launches all of the async send requests + * + */ + protected void sendAsync(Producer producer, ProcessSession session, FlowFile flowFile, InFlightMessageMonitor monitor) { + + if (monitor == null || monitor.getRecords().isEmpty()) + return; + + for (byte[] record: monitor.getRecords() ) { + try { + + publisherService.submit(new Callable<MessageId>() { + @Override + public MessageId call() throws Exception { + try { + return producer.sendAsync(record).handle((msgId, ex) -> { + if (msgId != null) { + monitor.getSuccessCounter().incrementAndGet(); + return msgId; + } else { + monitor.getFailureCounter().incrementAndGet(); + monitor.getFailures().add(record); + return null; + } + }).get(); + + } catch (final Throwable t) { + // This traps any exceptions thrown while calling the producer.sendAsync() method. + monitor.getFailureCounter().incrementAndGet(); + monitor.getFailures().add(record); + return null; + } finally { + monitor.getLatch().countDown(); + } + } + }); + } catch (final RejectedExecutionException ex) { + // This can happen if the processor is being Unscheduled. + } + } + } + + private void handleAsync(InFlightMessageMonitor monitor, ProcessSession session, FlowFile flowFile, String topic) { + try { + + boolean useOriginalForFailures = false; + monitor.getLatch().await(); + + if (monitor.getSuccessCounter().intValue() > 0) { + session.putAttribute(flowFile, MSG_COUNT, monitor.getSuccessCounter().get() + ""); --- End diff -- `flowFile = session.putAttribute`
---