umustafi commented on code in PR #3557: URL: https://github.com/apache/gobblin/pull/3557#discussion_r976814946
########## gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/SpecStoreChangeMonitor.java: ########## @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.service.monitoring; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.text.StringEscapeUtils; + +import com.codahale.metrics.Meter; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.google.inject.Inject; +import com.typesafe.config.Config; + +import lombok.extern.slf4j.Slf4j; + +import org.apache.gobblin.kafka.client.DecodeableKafkaRecord; +import org.apache.gobblin.runtime.api.FlowSpec; +import org.apache.gobblin.runtime.api.Spec; +import org.apache.gobblin.runtime.kafka.HighLevelConsumer; +import org.apache.gobblin.runtime.metrics.RuntimeMetrics; +import org.apache.gobblin.runtime.spec_catalog.AddSpecResponse; +import org.apache.gobblin.runtime.spec_catalog.FlowCatalog; +import org.apache.gobblin.service.modules.scheduler.GobblinServiceJobScheduler; + + +/** + * A Flow Spec Store change monitor that uses {@link SpecStoreChangeEvent} schema to process Kafka messages received + * from the consumer service. This monitor responds to changes to flow specs (creations, updates, deletes) and acts as + * a connector between the API and execution layers of GaaS. + */ +@Slf4j +public class SpecStoreChangeMonitor extends HighLevelConsumer { + public static final String SPEC_STORE_CHANGE_MONITOR_PREFIX = "specStoreChangeMonitor"; + static final String SPEC_STORE_CHANGE_MONITOR_TOPIC_KEY = "topic"; + static final String SPEC_STORE_CHANGE_MONITOR_NUM_THREADS_KEY = "numThreads"; + + // Metrics + private Meter successfullyAddedSpecs; + private Meter failedAddedSpecs; + private Meter deletedSpecs; + private Meter unexpectedErrors; + + protected CacheLoader<String, String> cacheLoader = new CacheLoader<String, String>() { + @Override + public String load(String key) throws Exception { + return key; + } + }; + + protected LoadingCache<String, String> + specChangesSeenCache = CacheBuilder.newBuilder().expireAfterWrite(10, TimeUnit.MINUTES).build(cacheLoader); + + @Inject + protected FlowCatalog flowCatalog; + + @Inject + protected GobblinServiceJobScheduler scheduler; + + @Inject + public SpecStoreChangeMonitor(String topic, Config config, int numThreads) { + super(topic, config, numThreads); + } + + @Override + /* + Note that although this class is multi-threaded and will call this message for multiple threads (each having a queue + associated with it), a given message itself will be partitioned and assigned to only one queue. + */ + protected void processMessage(DecodeableKafkaRecord message) { + String specUri = (String) message.getKey(); + SpecStoreChangeEvent value = (SpecStoreChangeEvent) message.getValue(); + + Long timestamp = value.getTimestamp(); + String operation = value.getOperationType().name(); + log.info("Processing message with specUri is {} timestamp is {} operation is {}", specUri, timestamp, operation); + + // If we've already processed a message with this timestamp and spec uri before then skip duplicate message + String changeIdentifier = timestamp.toString() + specUri; + if (specChangesSeenCache.getIfPresent(changeIdentifier) != null) { + return; + } + + // If event is a heartbeat type then log it and skip processing + if (operation == "HEARTBEAT") { + log.info("Received heartbeat message from time {}", timestamp); + return; + } + + Spec spec; + URI specAsUri = null; + + try { + specAsUri = new URI(specUri); + } catch (URISyntaxException e) { + log.warn("Could not create URI object for specUri {} due to error {}", specUri, e.getMessage()); + } + + spec = (operation != "DELETE") ? this.flowCatalog.getSpecWrapper(specAsUri) : null; + + // The monitor should continue to process messages regardless of failures with individual messages, instead we use + // metrics to keep track of failure to process certain SpecStoreChange events + try { + // Call respective action for the type of change received + AddSpecResponse response; + if (operation == "CREATE" || operation == "INSERT") { Review Comment: ah you're right, keep getting the Brooklin [opcodes](https://jarvis.corp.linkedin.com/codesearch/result/?name=BrooklinEnvelopeMetadataConstants.java&path=datastream%2Fdatastream%2Fdatastream-common%2Fsrc%2Fmain%2Fjava%2Fcom%2Flinkedin%2Fdatastream%2Fcommon&reponame=datastream%2Fdatastream) mixed up with the actions we take. INSERT/UPDATE -> create/update spec -> add spec DELETE -> deletespec -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@gobblin.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org