[
https://issues.apache.org/jira/browse/GOBBLIN-1705?focusedWorklogId=811372&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-811372
]
ASF GitHub Bot logged work on GOBBLIN-1705:
-------------------------------------------
Author: ASF GitHub Bot
Created on: 22/Sep/22 19:30
Start Date: 22/Sep/22 19:30
Worklog Time Spent: 10m
Work Description: umustafi commented on code in PR #3557:
URL: https://github.com/apache/gobblin/pull/3557#discussion_r978013722
##########
gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/SpecStoreChangeMonitor.java:
##########
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gobblin.service.monitoring;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.text.StringEscapeUtils;
+
+import com.codahale.metrics.Meter;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.inject.Inject;
+import com.typesafe.config.Config;
+
+import lombok.extern.slf4j.Slf4j;
+
+import org.apache.gobblin.kafka.client.DecodeableKafkaRecord;
+import org.apache.gobblin.runtime.api.FlowSpec;
+import org.apache.gobblin.runtime.api.Spec;
+import org.apache.gobblin.runtime.kafka.HighLevelConsumer;
+import org.apache.gobblin.runtime.metrics.RuntimeMetrics;
+import org.apache.gobblin.runtime.spec_catalog.AddSpecResponse;
+import org.apache.gobblin.runtime.spec_catalog.FlowCatalog;
+import org.apache.gobblin.service.modules.scheduler.GobblinServiceJobScheduler;
+
+
+/**
+ * A Flow Spec Store change monitor that uses {@link SpecStoreChangeEvent}
schema to process Kafka messages received
+ * from the consumer service. This monitor responds to changes to flow specs
(creations, updates, deletes) and acts as
+ * a connector between the API and execution layers of GaaS.
+ */
+@Slf4j
+public class SpecStoreChangeMonitor extends HighLevelConsumer {
+ public static final String SPEC_STORE_CHANGE_MONITOR_PREFIX =
"specStoreChangeMonitor";
+ static final String SPEC_STORE_CHANGE_MONITOR_TOPIC_KEY = "topic";
+ static final String SPEC_STORE_CHANGE_MONITOR_NUM_THREADS_KEY = "numThreads";
+
+ // Metrics
+ private Meter successfullyAddedSpecs;
+ private Meter failedAddedSpecs;
+ private Meter deletedSpecs;
+ private Meter unexpectedErrors;
+
+ protected CacheLoader<String, String> cacheLoader = new CacheLoader<String,
String>() {
+ @Override
+ public String load(String key) throws Exception {
+ return key;
+ }
+ };
+
+ protected LoadingCache<String, String>
+ specChangesSeenCache = CacheBuilder.newBuilder().expireAfterWrite(10,
TimeUnit.MINUTES).build(cacheLoader);
+
+ @Inject
+ protected FlowCatalog flowCatalog;
+
+ @Inject
+ protected GobblinServiceJobScheduler scheduler;
+
+ public SpecStoreChangeMonitor(String topic, Config config, int numThreads) {
+ super(topic, config, numThreads);
+ }
+
+ @Override
+ /*
+ Note that although this class is multi-threaded and will call this message
for multiple threads (each having a queue
+ associated with it), a given message itself will be partitioned and assigned
to only one queue.
+ */
+ protected void processMessage(DecodeableKafkaRecord message) {
+ String specUri = (String) message.getKey();
+ SpecStoreChangeEvent value = (SpecStoreChangeEvent) message.getValue();
+
+ Long timestamp = value.getTimestamp();
+ String operation = value.getOperationType().name();
+ log.info("Processing message with specUri is {} timestamp is {} operation
is {}", specUri, timestamp, operation);
+
+ // If we've already processed a message with this timestamp and spec uri
before then skip duplicate message
+ String changeIdentifier = timestamp.toString() + specUri;
+ if (specChangesSeenCache.getIfPresent(changeIdentifier) != null) {
+ return;
+ }
+
+ // If event is a heartbeat type then log it and skip processing
+ if (operation == "HEARTBEAT") {
+ log.debug("Received heartbeat message from time {}", timestamp);
+ return;
+ }
+
+ Spec spec;
+ URI specAsUri = null;
+
+ try {
+ specAsUri = new URI(specUri);
+ } catch (URISyntaxException e) {
+ log.warn("Could not create URI object for specUri {} due to error {}",
specUri, e.getMessage());
+ }
+
+ spec = (operation != "DELETE") ?
this.flowCatalog.getSpecWrapper(specAsUri) : null;
+
+ // The monitor should continue to process messages regardless of failures
with individual messages, instead we use
+ // metrics to keep track of failure to process certain SpecStoreChange
events
+ try {
+ // Call respective action for the type of change received
+ AddSpecResponse response;
+ if (operation == "INSERT" || operation == "UPDATE") {
+ response = scheduler.onAddSpec(spec);
+
+ // Null response means the dag failed to compile
+ if (response != null && response.getValue() != null) {
Review Comment:
good call out, we have a check for that that i will use instead
`isCompileSucessful`
Issue Time Tracking
-------------------
Worklog Id: (was: 811372)
Time Spent: 3h 20m (was: 3h 10m)
> New Consumer service that processes changes to Flow Spec Store
> --------------------------------------------------------------
>
> Key: GOBBLIN-1705
> URL: https://issues.apache.org/jira/browse/GOBBLIN-1705
> Project: Apache Gobblin
> Issue Type: New Feature
> Components: gobblin-service
> Reporter: Urmi Mustafi
> Assignee: Abhishek Tiwari
> Priority: Major
> Time Spent: 3h 20m
> Remaining Estimate: 0h
>
> In the multi-leader version of GaaS, the REST API layer will not directly
> contact the `GobblinServiceJobScheduler` to respond to API requests. Instead
> after flow level updates are persisted to MySQL, this new monitor will
> subscribe to Kafka events informing it of Flow Spec changes corresponding to
> the API requests and trigger their execution. There will be a similar change
> to follow to respond to other API requests.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)