[ https://issues.apache.org/jira/browse/NIFI-3709?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16292658#comment-16292658 ]
ASF GitHub Bot commented on NIFI-3709: -------------------------------------- Github user ijokarumawak commented on a diff in the pull request: https://github.com/apache/nifi/pull/2335#discussion_r157222062 --- Diff: nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/java/org/apache/nifi/atlas/NiFIAtlasHook.java --- @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.atlas; + +import com.sun.jersey.api.client.ClientResponse; +import org.apache.atlas.AtlasServiceException; +import org.apache.atlas.hook.AtlasHook; +import org.apache.atlas.model.instance.AtlasEntity; +import org.apache.atlas.model.instance.AtlasObjectId; +import org.apache.atlas.notification.hook.HookNotification.EntityPartialUpdateRequest; +import org.apache.atlas.notification.hook.HookNotification.HookNotificationMessage; +import org.apache.atlas.typesystem.Referenceable; +import org.apache.atlas.typesystem.persistence.Id; +import org.apache.nifi.atlas.provenance.lineage.LineageContext; +import org.apache.nifi.util.Tuple; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import static org.apache.atlas.notification.hook.HookNotification.HookNotificationType.ENTITY_PARTIAL_UPDATE; +import static org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName; +import static org.apache.nifi.atlas.NiFiTypes.ATTR_GUID; +import static org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS; +import static org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS; +import static org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME; +import static org.apache.nifi.atlas.NiFiTypes.ATTR_TYPENAME; +import static org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW_PATH; + +/** + * This class is not thread-safe as it holds uncommitted notification messages within instance. + * {@link #addMessage(HookNotificationMessage)} and {@link #commitMessages()} should be used serially from a single thread. + */ +public class NiFIAtlasHook extends AtlasHook implements LineageContext { + + public static final String NIFI_USER = "nifi"; + + private static final Logger logger = LoggerFactory.getLogger(NiFIAtlasHook.class); + private static final String CONF_PREFIX = "atlas.hook.nifi."; + private static final String HOOK_NUM_RETRIES = CONF_PREFIX + "numRetries"; + + private final NiFiAtlasClient atlasClient; + + /** + * An index to resolve a qualifiedName from a GUID. + */ + private final Map<String, String> guidToQualifiedName; + /** + * An index to resolve a Referenceable from a typeName::qualifiedName. + */ + private final Map<String, Referenceable> typedQualifiedNameToRef; + + + private static <K, V> Map<K, V> createCache(final int maxSize) { + return new LinkedHashMap<K, V>(maxSize, 0.75f, true) { --- End diff -- Well, it's just a private method to create cache instances within this class. So if different default optimizations are needed, then we can do that here as well without affecting others. > Export NiFi flow dataset lineage to Apache Atlas > ------------------------------------------------ > > Key: NIFI-3709 > URL: https://issues.apache.org/jira/browse/NIFI-3709 > Project: Apache NiFi > Issue Type: Improvement > Components: Extensions > Reporter: Koji Kawamura > Assignee: Koji Kawamura > > While Apache NiFi has provenance and event level lineage support within its > data flow, Apache Atlas also does manage lineage between dataset and process > those interacting with such data. > It would be beneficial for users who use both NiFi and Atlas and if they can > see end-to-end data lineage on Atlas lineage graph, as some type of dataset > are processed by both NiFi and technologies around Atlas such as Storm, > Falcon or Sqoop. For example, Kafka topics and Hive tables. > In order to make this integration happen, I propose a NiFi reporting task > that analyzes NiFi flow then creates DataSet and Process entities in Atlas. > The challenge is how to design NiFi flow dataset level lineage within Atlas > lineage graph. > If we just add a single NiFi process and connect every DataSet from/to it, it > would be too ambiguous since it won't be clear which part of a NiFi flow > actually interact with certain dataset. > But if we put every NiFi processor as independent process in Atlas, it would > be too granular, too. Also, we already have detailed event level lineage in > NiFi, we wouldn't need the same level in Atlas. > If we can group certain processors in a NiFI flow as a process in Atlas, it > would be a nice granularity. -- This message was sent by Atlassian JIRA (v6.4.14#64029)