rdhabalia commented on a change in pull request #2605: implement topic routing 
on a per record basis
URL: https://github.com/apache/incubator-pulsar/pull/2605#discussion_r218603562
 
 

 ##########
 File path: 
pulsar-functions/instance/src/main/java/org/apache/pulsar/functions/sink/PulsarSink.java
 ##########
 @@ -60,140 +62,166 @@
     private final String fqfn;
 
     private interface PulsarSinkProcessor<T> {
-        void initializeOutputProducer(String outputTopic, Schema<T> schema, 
String fqfn) throws Exception;
 
         TypedMessageBuilder<T> newMessage(Record<T> record) throws Exception;
 
         void sendOutputMessage(TypedMessageBuilder<T> msg, Record<T> record) 
throws Exception;
 
-        abstract void close() throws Exception;
+        void close() throws Exception;
     }
 
-    private class PulsarSinkAtMostOnceProcessor implements 
PulsarSinkProcessor<T> {
-        private Producer<T> producer;
+    private abstract class PulsarSinkProcessorBase implements 
PulsarSinkProcessor<T> {
+        protected Map<String, Producer<T>> publishProducers = new 
ConcurrentHashMap<>();
+        protected Schema schema;
 
-        @Override
-        public void initializeOutputProducer(String outputTopic, Schema<T> 
schema, String fqfn) throws Exception {
-            this.producer = AbstractOneOuputTopicProducers.createProducer(
-                    client, pulsarSinkConfig.getTopic(), null, schema, fqfn);
+        protected PulsarSinkProcessorBase(Schema schema) {
+            this.schema = schema;
         }
 
-        @Override
-        public TypedMessageBuilder<T> newMessage(Record<T> record) {
-            return producer.newMessage();
+        public <T> Producer<T> createProducer(PulsarClient client, String 
topic, String producerName, Schema<T> schema, String fqfn)
+                throws PulsarClientException {
+            ProducerBuilder<T> builder = client.newProducer(schema)
+                    .blockIfQueueFull(true)
+                    .enableBatching(true)
+                    .batchingMaxPublishDelay(1, TimeUnit.MILLISECONDS)
+                    .compressionType(CompressionType.LZ4)
+                    .hashingScheme(HashingScheme.Murmur3_32Hash) //
+                    .messageRoutingMode(MessageRoutingMode.CustomPartition)
+                    .messageRouter(FunctionResultRouter.of())
+                    .topic(topic);
+            if (producerName != null) {
+                builder.producerName(producerName);
+            }
+
+            return builder
+                    .property("application", "pulsarfunction")
+                    .property("fqfn", fqfn).create();
         }
 
-        @Override
-        public void sendOutputMessage(TypedMessageBuilder<T> msg, Record<T> 
record) throws Exception {
-            msg.sendAsync();
+        protected Producer<T> getProducer(String destinationTopic) {
+            return getProducer(destinationTopic, null, destinationTopic);
         }
 
-        @Override
-        public void close() throws Exception {
-            if (null != producer) {
+        protected Producer<T> getProducer(String producerId, String 
producerName, String topicName) {
+
+            Producer<T> producer = publishProducers.get(producerId);
+
+            if (producer == null) {
                 try {
-                    producer.close();
+                    Producer<T> newProducer = createProducer(
+                            client,
+                            topicName,
+                            producerName,
+                            schema,
+                            fqfn);
+
+                    Producer<T> existingProducer = 
publishProducers.putIfAbsent(producerId, newProducer);
+
+                    if (existingProducer != null) {
+                        // The value in the map was not updated after the 
concurrent put
+                        newProducer.close();
+                        producer = existingProducer;
+                    } else {
+                        producer = newProducer;
+                    }
+
                 } catch (PulsarClientException e) {
-                    log.warn("Fail to close producer for processor {}", 
pulsarSinkConfig.getTopic(), e);
+                    log.error("Failed to create Producer while doing user 
publish", e);
+                    throw new RuntimeException(e);
                 }
             }
+            return producer;
         }
-    }
-
-    private class PulsarSinkAtLeastOnceProcessor implements 
PulsarSinkProcessor<T> {
-        private Producer<T> producer;
 
         @Override
-        public void initializeOutputProducer(String outputTopic, Schema<T> 
schema, String fqfn) throws Exception {
-            this.producer = AbstractOneOuputTopicProducers.createProducer(
-                    client, pulsarSinkConfig.getTopic(), null, schema, fqfn);
+        public void close() throws Exception {
+            List<CompletableFuture<Void>> closeFutures = new 
ArrayList<>(publishProducers.size());
+            for (Map.Entry<String, Producer<T>> entry: 
publishProducers.entrySet()) {
+                String topicId = entry.getKey();
+                Producer<T> producer = entry.getValue();
+                closeFutures.add(producer.closeAsync().exceptionally(throwable 
-> {
+                    log.warn("Fail to close producer for output topic {}", 
topicId, throwable);
+                    return null;
+                }));
+            }
+            try {
+                FutureUtils.result(FutureUtils.collect(closeFutures));
+            } catch (Exception e) {
+                log.warn("Fail to close all the producers", e);
+            }
+        }
+    }
+
+    private class PulsarSinkAtMostOnceProcessor extends 
PulsarSinkProcessorBase {
+        public PulsarSinkAtMostOnceProcessor(Schema schema) {
+            super(schema);
         }
 
         @Override
         public TypedMessageBuilder<T> newMessage(Record<T> record) {
-            return producer.newMessage();
+            if (record.getDestinationTopic().isPresent()) {
+                return 
getProducer(record.getDestinationTopic().get()).newMessage();
+            }
+            return getProducer(pulsarSinkConfig.getTopic()).newMessage();
         }
 
         @Override
         public void sendOutputMessage(TypedMessageBuilder<T> msg, Record<T> 
record) throws Exception {
-            msg.sendAsync().thenAccept(messageId -> record.ack());
+            msg.sendAsync();
+        }
+    }
+
+    private class PulsarSinkAtLeastOnceProcessor extends 
PulsarSinkAtMostOnceProcessor {
+        public PulsarSinkAtLeastOnceProcessor(Schema schema) {
+            super(schema);
         }
 
         @Override
-        public void close() throws Exception {
-            if (null != producer) {
-                try {
-                    producer.close();
-                } catch (PulsarClientException e) {
-                    log.warn("Fail to close producer for processor {}", 
pulsarSinkConfig.getTopic(), e);
-                }
-            }
+        public void sendOutputMessage(TypedMessageBuilder<T> msg, Record<T> 
record) throws Exception {
+            msg.sendAsync().thenAccept(messageId -> record.ack());
         }
     }
 
-    private class PulsarSinkEffectivelyOnceProcessor implements 
PulsarSinkProcessor<T>, ConsumerEventListener {
+    private class PulsarSinkEffectivelyOnceProcessor extends 
PulsarSinkProcessorBase {
 
-        @Getter(AccessLevel.PACKAGE)
-        protected Producers<T> outputProducer;
 
-        @Override
-        public void initializeOutputProducer(String outputTopic, Schema<T> 
schema, String fqfn) throws Exception {
-            outputProducer = new 
MultiConsumersOneOuputTopicProducers<T>(client, outputTopic, schema, fqfn);
-            outputProducer.initialize();
+        public PulsarSinkEffectivelyOnceProcessor(Schema schema) {
+            super(schema);
         }
 
         @Override
         public TypedMessageBuilder<T> newMessage(Record<T> record) throws 
Exception {
-            // Route message to appropriate partition producer
-            return 
outputProducer.getProducer(record.getPartitionId().get()).newMessage();
+            if (!record.getPartitionId().isPresent()) {
+                throw new RuntimeException("PartitionId needs to be specified 
for every record while in Effectively-once mode");
+            }
+            if (record.getDestinationTopic().isPresent()) {
 
 Review comment:
   may be can reduce some code..
   ```
   String destinationTopic = record.getDestinationTopic().isPresent() ? 
record.getDestinationTopic().get(): pulsarSinkConfig.getTopic();
   return getProducer(destinationTopic,..);
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to