sijie commented on a change in pull request #2998: support mysql binlog sync to
pulsar by canal
URL: https://github.com/apache/pulsar/pull/2998#discussion_r234359049
##########
File path:
pulsar-io/canal/src/main/java/org/apache/pulsar/io/canal/CanalSource.java
##########
@@ -0,0 +1,152 @@
+package org.apache.pulsar.io.canal;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.serializer.SerializerFeature;
+import com.alibaba.otter.canal.client.CanalConnector;
+import com.alibaba.otter.canal.client.CanalConnectors;
+import com.alibaba.otter.canal.protocol.Message;
+import com.alibaba.otter.canal.protocol.FlatMessage;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.pulsar.functions.api.Record;
+import org.apache.pulsar.io.core.PushSource;
+import org.apache.pulsar.io.core.SourceContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.slf4j.MDC;
+import org.springframework.util.Assert;
+
+import java.net.InetSocketAddress;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+@Slf4j
+public class CanalSource extends PushSource<byte[]> {
+
+ protected Thread thread = null;
+
+ protected volatile boolean running = false;
+
+ private CanalConnector connector;
+
+ private CanalSourceConfig canalSourceConfig;
+
+ protected Thread.UncaughtExceptionHandler handler = new
Thread.UncaughtExceptionHandler() {
+
+ @Override
+ public void uncaughtException(Thread t, Throwable e) {
+ log.error("parse events has an error", e);
+ }
+ };
+
+ @Override
+ public void open(Map<String, Object> config, SourceContext sourceContext)
throws Exception {
+ canalSourceConfig = CanalSourceConfig.load(config);
+ if (canalSourceConfig.getCluster()) {
+ connector =
CanalConnectors.newClusterConnector(canalSourceConfig.getZkServers(),
+ canalSourceConfig.getDestination(),
canalSourceConfig.getUsername(), canalSourceConfig.getPassword());
+ } else {
+ connector = CanalConnectors.newSingleConnector(
+ new
InetSocketAddress(canalSourceConfig.getSingleHostname(),
canalSourceConfig.getSinglePort()),
+ canalSourceConfig.getDestination(),
canalSourceConfig.getUsername(), canalSourceConfig.getPassword());
+ }
+ log.info("start canal connect");
+ this.start();
+
+ }
+
+ protected void start() {
+ Assert.notNull(connector, "connector is null");
+ thread = new Thread(new Runnable() {
+
+ @Override
+ public void run() {
+ process();
+ }
+ });
+
+ thread.setUncaughtExceptionHandler(handler);
+ running = true;
+ thread.start();
+ }
+
+ @Override
+ public void close() throws InterruptedException {
+ log.info("close canal source");
+ if (!running) {
+ return;
+ }
+ running = false;
+ if (thread != null) {
+ thread.interrupt();
+ thread.join();
+ }
+ if (connector != null) {
+ connector.disconnect();
+ }
+
+ MDC.remove("destination");
+ }
+
+ protected void process() {
+ while (running) {
+ try {
+ MDC.put("destination", canalSourceConfig.getDestination());
+ connector.connect();
+ log.info("start canal process");
+ connector.subscribe();
+ while (running) {
+ Message message =
connector.getWithoutAck(canalSourceConfig.getBatchSize());
+ // delete the setRaw in new version of canal-client
+ message.setRaw(false);
+ List<FlatMessage> flatMessages =
FlatMessage.messageConverter(message);
+ long batchId = message.getId();
+ int size = message.getEntries().size();
+ if (batchId == -1 || size == 0) {
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException e) {
+ }
+ } else {
+ if (flatMessages != null) {
+ for (FlatMessage flatMessage : flatMessages) {
+ String m = JSON.toJSONString(flatMessage,
SerializerFeature.WriteMapNullValue);
+ consume(new CanalRecord(m.getBytes(),
batchId));
+ }
+ }
+ }
+
+ connector.ack(batchId);
Review comment:
if you are acking here, it only supports at-most-once semantic. you might
consider implementing ack() in CanalRecord
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services