bbende commented on code in PR #8765:
URL: https://github.com/apache/nifi/pull/8765#discussion_r1594115130


##########
nifi-extension-bundles/nifi-github-bundle/nifi-github-extensions/src/main/java/org/apache/nifi/github/GitHubFlowRegistryClient.java:
##########
@@ -0,0 +1,633 @@
+/*
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.nifi.github;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.MapperFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.databind.json.JsonMapper;
+import com.fasterxml.jackson.databind.type.TypeFactory;
+import 
com.fasterxml.jackson.module.jakarta.xmlbind.JakartaXmlBindAnnotationIntrospector;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.ValidationContext;
+import org.apache.nifi.components.ValidationResult;
+import org.apache.nifi.flow.ConnectableComponent;
+import org.apache.nifi.flow.Position;
+import org.apache.nifi.flow.VersionedComponent;
+import org.apache.nifi.flow.VersionedConnection;
+import org.apache.nifi.flow.VersionedFlowCoordinates;
+import org.apache.nifi.flow.VersionedProcessGroup;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.registry.flow.AbstractFlowRegistryClient;
+import org.apache.nifi.registry.flow.BucketLocation;
+import org.apache.nifi.registry.flow.FlowLocation;
+import org.apache.nifi.registry.flow.FlowRegistryBranch;
+import org.apache.nifi.registry.flow.FlowRegistryBucket;
+import org.apache.nifi.registry.flow.FlowRegistryClientConfigurationContext;
+import org.apache.nifi.registry.flow.FlowRegistryException;
+import org.apache.nifi.registry.flow.FlowRegistryPermissions;
+import org.apache.nifi.registry.flow.FlowVersionLocation;
+import org.apache.nifi.registry.flow.RegisterAction;
+import org.apache.nifi.registry.flow.RegisteredFlow;
+import org.apache.nifi.registry.flow.RegisteredFlowSnapshot;
+import org.apache.nifi.registry.flow.RegisteredFlowSnapshotMetadata;
+import org.kohsuke.github.GHCommit;
+import org.kohsuke.github.GHContent;
+import org.kohsuke.github.GHContentUpdateResponse;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.stream.Collectors;
+
+/**
+ * Implementation of {@link org.apache.nifi.registry.flow.FlowRegistryClient} 
that uses GitHub for version controlling flows.
+ */
+public class GitHubFlowRegistryClient extends AbstractFlowRegistryClient {
+
+    static final PropertyDescriptor GITHUB_API_URL = new 
PropertyDescriptor.Builder()
+            .name("GitHub API URL")
+            .description("The URL of the GitHub API")
+            .addValidator(StandardValidators.URL_VALIDATOR)
+            .defaultValue("https://api.github.com/";)
+            .required(true)
+            .build();
+
+    static final PropertyDescriptor REPOSITORY_NAME = new 
PropertyDescriptor.Builder()
+            .name("Repository Name")
+            .description("The name of the repository")
+            .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+            .required(true)
+            .build();
+
+    static final PropertyDescriptor REPOSITORY_OWNER = new 
PropertyDescriptor.Builder()
+            .name("Repository Owner")
+            .description("The owner of the repository")
+            .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+            .required(true)
+            .build();
+
+    static final PropertyDescriptor REPOSITORY_BRANCH = new 
PropertyDescriptor.Builder()
+            .name("Default Branch")
+            .description("The default branch to use for this client")
+            .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+            .defaultValue("main")
+            .required(true)
+            .build();
+
+    static final PropertyDescriptor REPOSITORY_PATH = new 
PropertyDescriptor.Builder()
+            .name("Repository Path")
+            .description("The path with in the repository that this client 
will use to store all data. " +
+                    "If left blank, then the root of the repository will be 
used.")
+            .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+            .required(false)
+            .build();
+
+    static final PropertyDescriptor AUTHENTICATION_TYPE = new 
PropertyDescriptor.Builder()
+            .name("Authentication Type")
+            .description("The type of authentication to use for accessing 
GitHub")
+            .allowableValues(GitHubAuthenticationType.values())
+            .defaultValue(GitHubAuthenticationType.NONE.name())
+            .required(true)
+            .build();
+
+    static final PropertyDescriptor PERSONAL_ACCESS_TOKEN = new 
PropertyDescriptor.Builder()
+            .name("Personal Access Token")
+            .description("The personal access token to use for authentication")
+            .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+            .required(true)
+            .sensitive(true)
+            .dependsOn(AUTHENTICATION_TYPE, 
GitHubAuthenticationType.PERSONAL_ACCESS_TOKEN.name())
+            .build();
+
+    static final PropertyDescriptor APP_INSTALLATION_TOKEN = new 
PropertyDescriptor.Builder()
+            .name("App Installation Token")
+            .description("The app installation token to use for 
authentication")
+            .addValidator(StandardValidators.NON_BLANK_VALIDATOR)
+            .required(true)
+            .sensitive(true)
+            .dependsOn(AUTHENTICATION_TYPE, 
GitHubAuthenticationType.APP_INSTALLATION_TOKEN.name())
+            .build();
+
+    static final List<PropertyDescriptor> PROPERTY_DESCRIPTORS = List.of(
+            GITHUB_API_URL,
+            REPOSITORY_OWNER,
+            REPOSITORY_NAME,
+            REPOSITORY_BRANCH,
+            REPOSITORY_PATH,
+            AUTHENTICATION_TYPE,
+            PERSONAL_ACCESS_TOKEN,
+            APP_INSTALLATION_TOKEN
+    );
+
+    private static final ObjectMapper OBJECT_MAPPER = JsonMapper.builder()
+            .serializationInclusion(JsonInclude.Include.NON_NULL)
+            
.defaultPropertyInclusion(JsonInclude.Value.construct(JsonInclude.Include.NON_NULL,
 JsonInclude.Include.NON_NULL))
+            .annotationIntrospector(new 
JakartaXmlBindAnnotationIntrospector(TypeFactory.defaultInstance()))
+            .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, 
false)
+            .configure(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY, true)
+            .enable(SerializationFeature.INDENT_OUTPUT)
+            .addModule(new VersionedComponentModule())
+            .build();
+
+    static final String DEFAULT_BUCKET_NAME = "default";
+    static final String DEFAULT_BUCKET_KEEP_FILE_PATH = DEFAULT_BUCKET_NAME + 
"/.keep";
+    static final String DEFAULT_BUCKET_KEEP_FILE_CONTENT = "Do Not Delete";
+    static final String DEFAULT_BUCKET_KEEP_FILE_MESSAGE = "Creating Default 
bucket";
+
+    static final String REGISTER_FLOW_COMMENT = "Register Flow";
+    static final String DEREGISTER_FLOW_COMMENT = "Deregister Flow";
+    static final String DEFAULT_FLOW_SNAPSHOT_COMMIT_MESSAGE = "Saving Flow 
Snapshot";
+    static final String SNAPSHOT_FILE_EXTENSION = ".json";
+    static final String SNAPSHOT_FILE_PATH_FORMAT = "%s/%s" + 
SNAPSHOT_FILE_EXTENSION;
+    static final String FLOW_CONTENTS_GROUP_ID = "flow-contents-group";
+
+    private volatile GitHubRepositoryClient repositoryClient;
+    private final AtomicBoolean initialized = new AtomicBoolean(false);
+
+    @Override
+    protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+        return PROPERTY_DESCRIPTORS;
+    }
+
+    @Override
+    protected Collection<ValidationResult> customValidate(final 
ValidationContext validationContext) {
+        final List<ValidationResult> results = new 
ArrayList<>(super.customValidate(validationContext));
+
+        final String repoPath = 
validationContext.getProperty(REPOSITORY_PATH).getValue();
+        if (repoPath != null && (repoPath.startsWith("/") || 
repoPath.endsWith("/"))) {
+            results.add(new ValidationResult.Builder()
+                    .subject(REPOSITORY_PATH.getDisplayName())
+                    .valid(false)
+                    .explanation("Path can not start or end with /")
+                    .build());
+        }
+
+        return results;
+    }
+
+    @Override
+    public void onPropertyModified(final PropertyDescriptor descriptor, final 
String oldValue, final String newValue) {
+        super.onPropertyModified(descriptor, oldValue, newValue);
+        synchronized (this) {
+            initialized.set(false);
+        }
+    }
+
+    @Override
+    public boolean isStorageLocationApplicable(final 
FlowRegistryClientConfigurationContext context, final String location) {
+        return false;
+    }
+
+    @Override
+    public boolean isBranchingSupported(final 
FlowRegistryClientConfigurationContext context) {
+        return true;
+    }
+
+    @Override
+    public Set<FlowRegistryBranch> getBranches(final 
FlowRegistryClientConfigurationContext context) throws FlowRegistryException, 
IOException {
+        final GitHubRepositoryClient repositoryClient = 
getRepositoryClient(context);
+        return repositoryClient.getBranches().stream()
+                .map(branchName -> {
+                    final FlowRegistryBranch flowRegistryBranch = new 
FlowRegistryBranch();
+                    flowRegistryBranch.setName(branchName);
+                    return flowRegistryBranch;
+                }).collect(Collectors.toSet());
+    }
+
+    @Override
+    public FlowRegistryBranch getDefaultBranch(final 
FlowRegistryClientConfigurationContext context) {
+        final FlowRegistryBranch defaultBranch = new FlowRegistryBranch();
+        
defaultBranch.setName(context.getProperty(REPOSITORY_BRANCH).getValue());
+        return defaultBranch;
+    }
+
+    @Override
+    public Set<FlowRegistryBucket> getBuckets(final 
FlowRegistryClientConfigurationContext context, final String branch) throws 
IOException, FlowRegistryException {
+        final GitHubRepositoryClient repositoryClient = 
getRepositoryClient(context);
+        final Set<FlowRegistryBucket> buckets = 
repositoryClient.getDirectoryNames("", branch).stream()
+                .map(this::createFlowRegistryBucket)
+                .collect(Collectors.toSet());
+
+        // if the repository has no top-level directories, then return a 
default bucket entry, this won't exist in the repository until the first time a 
flow is saved to it
+        return buckets.isEmpty() ? 
Set.of(createFlowRegistryBucket(DEFAULT_BUCKET_NAME)) : buckets;
+    }
+
+    @Override
+    public FlowRegistryBucket getBucket(final 
FlowRegistryClientConfigurationContext context, final BucketLocation 
bucketLocation) throws FlowRegistryException, IOException {
+        return createFlowRegistryBucket(bucketLocation.getBucketId());
+    }
+
+    @Override
+    public RegisteredFlow registerFlow(final 
FlowRegistryClientConfigurationContext context, final RegisteredFlow flow) 
throws FlowRegistryException, IOException {
+        final GitHubRepositoryClient repositoryClient = 
getRepositoryClient(context);
+
+        final String branch = flow.getBranch();
+        final FlowLocation flowLocation = new FlowLocation(branch, 
flow.getBucketIdentifier(), flow.getIdentifier());
+        final String filePath = getSnapshotFilePath(flowLocation);
+
+        final Optional<String> existingFileSha = 
repositoryClient.getContentSha(filePath, branch);
+        if (existingFileSha.isPresent()) {
+            throw new FlowRegistryException("Another flow is already 
registered at [" + filePath + "] on branch [" + branch + "]");
+        }
+
+        // Clear values we don't want in the json stored in GitHub
+        final String originalBucketId = flow.getBucketIdentifier();
+        flow.setBucketIdentifier(null);
+        flow.setBucketName(null);
+        flow.setBranch(null);
+
+        final RegisteredFlowSnapshot flowSnapshot = new 
RegisteredFlowSnapshot();
+        flowSnapshot.setBucket(null);
+        flowSnapshot.setFlow(flow);
+
+        final GitHubCreateContentRequest request = 
GitHubCreateContentRequest.builder()
+                .branch(branch)
+                .path(filePath)
+                .content(OBJECT_MAPPER.writeValueAsString(flowSnapshot))
+                .message(REGISTER_FLOW_COMMENT)
+                .build();
+
+        repositoryClient.createContent(request);
+
+        // Re-populate fields before returning
+        flow.setBucketName(originalBucketId);
+        flow.setBucketIdentifier(originalBucketId);
+        flow.setBranch(branch);
+
+        return flow;
+    }
+
+    @Override
+    public RegisteredFlow deregisterFlow(final 
FlowRegistryClientConfigurationContext context, final FlowLocation 
flowLocation) throws FlowRegistryException, IOException {
+        final GitHubRepositoryClient repositoryClient = 
getRepositoryClient(context);
+
+        final String branch = flowLocation.getBranch();
+        final String filePath = getSnapshotFilePath(flowLocation);
+        final GHContent deletedSnapshotContent = 
repositoryClient.deleteContent(filePath, DEREGISTER_FLOW_COMMENT, branch);
+
+        final RegisteredFlowSnapshot deletedSnapshot = 
getSnapshot(deletedSnapshotContent.read());
+        updateBucketReferences(deletedSnapshot, flowLocation.getBucketId());
+        return deletedSnapshot.getFlow();
+    }
+
+    @Override
+    public RegisteredFlow getFlow(final FlowRegistryClientConfigurationContext 
context, final FlowLocation flowLocation) throws FlowRegistryException, 
IOException {
+        final String branch = flowLocation.getBranch();
+        final String filePath = getSnapshotFilePath(flowLocation);
+
+        final RegisteredFlowSnapshot existingSnapshot = getSnapshot(filePath, 
branch);
+        populateFlowAndSnapshotMetadata(existingSnapshot, flowLocation);
+        updateBucketReferences(existingSnapshot, flowLocation.getBucketId());
+
+        final RegisteredFlow registeredFlow = existingSnapshot.getFlow();
+        registeredFlow.setBranch(branch);
+        return registeredFlow;
+    }
+
+    @Override
+    public Set<RegisteredFlow> getFlows(final 
FlowRegistryClientConfigurationContext context, final BucketLocation 
bucketLocation) throws IOException, FlowRegistryException {
+        final GitHubRepositoryClient repositoryClient = 
getRepositoryClient(context);
+
+        final String branch = bucketLocation.getBranch();
+        final String bucketId = bucketLocation.getBucketId();
+        final Set<RegisteredFlow> registeredFlows = new LinkedHashSet<>();
+
+        for (final String filename : repositoryClient.getFileNames(bucketId, 
branch)) {
+            if (!filename.endsWith(SNAPSHOT_FILE_EXTENSION)) {
+                continue;
+            }
+
+            final String flowId = filename.replace(SNAPSHOT_FILE_EXTENSION, 
"");
+            final RegisteredFlow registeredFlow = new RegisteredFlow();
+            registeredFlow.setIdentifier(flowId);
+            registeredFlow.setName(flowId);
+            registeredFlow.setBranch(branch);
+            registeredFlow.setBucketIdentifier(bucketId);
+            registeredFlow.setBucketName(bucketId);
+            registeredFlows.add(registeredFlow);
+        }
+
+        return registeredFlows;
+    }
+
+    @Override
+    public RegisteredFlowSnapshot getFlowContents(final 
FlowRegistryClientConfigurationContext context, final FlowVersionLocation 
flowVersionLocation)
+            throws FlowRegistryException, IOException {
+        final GitHubRepositoryClient repositoryClient = 
getRepositoryClient(context);
+
+        final String version = flowVersionLocation.getVersion();
+        final String filePath = getSnapshotFilePath(flowVersionLocation);
+
+        final InputStream inputStream = 
repositoryClient.getContentFromCommit(filePath, version);
+        final RegisteredFlowSnapshot flowSnapshot = getSnapshot(inputStream);
+        populateFlowAndSnapshotMetadata(flowSnapshot, flowVersionLocation);
+
+        // populate values that aren't store in GitHub
+        flowSnapshot.getSnapshotMetadata().setVersion(version);
+        
flowSnapshot.getSnapshotMetadata().setBranch(flowVersionLocation.getBranch());
+        flowSnapshot.getFlow().setBranch(flowVersionLocation.getBranch());
+
+        // populate outgoing bucket references
+        updateBucketReferences(flowSnapshot, 
flowVersionLocation.getBucketId());
+
+        // determine if the version is the "latest" version by comparing to 
the response of getLatestVersion
+        final String latestVersion = getLatestVersion(context, 
flowVersionLocation).orElse(null);
+        flowSnapshot.setLatest(version.equals(latestVersion));
+
+        return flowSnapshot;
+    }
+
+    @Override
+    public RegisteredFlowSnapshot registerFlowSnapshot(final 
FlowRegistryClientConfigurationContext context, final RegisteredFlowSnapshot 
flowSnapshot, final RegisterAction action)
+            throws FlowRegistryException, IOException {
+        final GitHubRepositoryClient repositoryClient = 
getRepositoryClient(context);
+        final RegisteredFlowSnapshotMetadata snapshotMetadata = 
flowSnapshot.getSnapshotMetadata();
+
+        final String branch = snapshotMetadata.getBranch();
+        final FlowLocation flowLocation = new 
FlowLocation(snapshotMetadata.getBranch(), 
snapshotMetadata.getBucketIdentifier(), snapshotMetadata.getFlowIdentifier());
+        final String filePath = getSnapshotFilePath(flowLocation);
+        final String previousSha = repositoryClient.getContentSha(filePath, 
branch).orElse(null);
+
+        final String snapshotComments = snapshotMetadata.getComments();
+        final String commitMessage = StringUtils.isBlank(snapshotComments) ? 
DEFAULT_FLOW_SNAPSHOT_COMMIT_MESSAGE : snapshotComments;
+
+        final RegisteredFlowSnapshot existingSnapshot = getSnapshot(filePath, 
branch);
+        populateFlowAndSnapshotMetadata(existingSnapshot, flowLocation);
+
+        final RegisteredFlow existingFlow = existingSnapshot.getFlow();
+        existingFlow.setBranch(null);
+        flowSnapshot.setFlow(existingFlow);
+
+        // Clear values we don't want stored in the json in GitHub
+        flowSnapshot.setBucket(null);
+        flowSnapshot.getSnapshotMetadata().setBucketIdentifier(null);
+        flowSnapshot.getSnapshotMetadata().setBranch(null);
+        flowSnapshot.getSnapshotMetadata().setVersion(null);
+        flowSnapshot.getSnapshotMetadata().setComments(null);
+        flowSnapshot.getSnapshotMetadata().setTimestamp(0);
+
+        // replace the id of the top level group and all of its references 
with a constant value prior to serializing to avoid
+        // unnecessary diffs when different instances of the same flow are 
imported and have different top-level PG ids
+        final String originalFlowContentsGroupId = 
replaceGroupId(flowSnapshot.getFlowContents(), FLOW_CONTENTS_GROUP_ID);
+        final Position originalFlowContentsPosition = 
replacePosition(flowSnapshot.getFlowContents(), new Position(0, 0));
+
+        final GitHubCreateContentRequest createContentRequest = 
GitHubCreateContentRequest.builder()
+                .branch(branch)
+                .path(filePath)
+                .content(OBJECT_MAPPER.writeValueAsString(flowSnapshot))
+                .message(commitMessage)
+                .existingContentSha(previousSha)
+                .build();
+
+        final GHContentUpdateResponse createContentResponse = 
repositoryClient.createContent(createContentRequest);
+        final String createContentCommitSha = 
createContentResponse.getCommit().getSha();
+
+        final VersionedFlowCoordinates versionedFlowCoordinates = new 
VersionedFlowCoordinates();
+        versionedFlowCoordinates.setRegistryId(getIdentifier());
+        versionedFlowCoordinates.setBranch(flowLocation.getBranch());
+        versionedFlowCoordinates.setBucketId(flowLocation.getBucketId());
+        versionedFlowCoordinates.setFlowId(flowLocation.getFlowId());
+        versionedFlowCoordinates.setVersion(createContentCommitSha);
+
+        
flowSnapshot.getFlowContents().setVersionedFlowCoordinates(versionedFlowCoordinates);
+        flowSnapshot.getFlow().setBranch(branch);
+        flowSnapshot.getSnapshotMetadata().setBranch(branch);
+        flowSnapshot.getSnapshotMetadata().setVersion(createContentCommitSha);
+        flowSnapshot.setLatest(true);
+
+        // populate outgoing bucket references
+        updateBucketReferences(flowSnapshot, flowLocation.getBucketId());
+
+        // set back to the original id so that the returned snapshot is has 
the correct values from what was passed in
+        replaceGroupId(flowSnapshot.getFlowContents(), 
originalFlowContentsGroupId);
+        replacePosition(flowSnapshot.getFlowContents(), 
originalFlowContentsPosition);
+
+        return flowSnapshot;
+    }
+
+    @Override
+    public Set<RegisteredFlowSnapshotMetadata> getFlowVersions(final 
FlowRegistryClientConfigurationContext context, final FlowLocation flowLocation)
+            throws FlowRegistryException, IOException {
+        final GitHubRepositoryClient repositoryClient = 
getRepositoryClient(context);
+
+        final String branch = flowLocation.getBranch();
+        final String filePath = getSnapshotFilePath(flowLocation);
+
+        final Set<RegisteredFlowSnapshotMetadata> snapshotMetadataSet = new 
LinkedHashSet<>();
+        for (final GHCommit ghCommit : repositoryClient.getCommits(filePath, 
branch)) {
+            final RegisteredFlowSnapshotMetadata snapshotMetadata = 
createSnapshotMetadata(ghCommit, flowLocation);
+            if (REGISTER_FLOW_COMMENT.equals(snapshotMetadata.getComments())) {
+                continue;
+            }
+            snapshotMetadataSet.add(snapshotMetadata);
+        }
+        return snapshotMetadataSet;
+    }
+
+    @Override
+    public Optional<String> getLatestVersion(final 
FlowRegistryClientConfigurationContext context, final FlowLocation 
flowLocation) throws FlowRegistryException, IOException {
+        final GitHubRepositoryClient repositoryClient = 
getRepositoryClient(context);
+
+        final String branch = flowLocation.getBranch();
+        final String filePath = getSnapshotFilePath(flowLocation);
+
+        final List<GHCommit> commits = repositoryClient.getCommits(filePath, 
branch);
+        final String latestVersion = commits.isEmpty() ? null : 
commits.getFirst().getSHA1();
+        return Optional.ofNullable(latestVersion);
+    }
+
+    @Override
+    public String generateFlowId(final String flowName) {
+        return flowName.trim()
+                .replaceAll("\\s", "-") // replace whitespace with -
+                .replaceAll("[^a-zA-Z0-9-]", "") // replace all other invalid 
chars with empty string
+                .replaceAll("(-)\\1+", "$1"); // replace consecutive - with 
single -
+    }
+
+    private FlowRegistryBucket createFlowRegistryBucket(final String name) {
+        final FlowRegistryPermissions bucketPermissions = new 
FlowRegistryPermissions();
+        bucketPermissions.setCanRead(true);
+        bucketPermissions.setCanWrite(true);
+        bucketPermissions.setCanDelete(true);
+
+        final FlowRegistryBucket bucket = new FlowRegistryBucket();
+        bucket.setIdentifier(name);
+        bucket.setName(name);
+        bucket.setPermissions(bucketPermissions);
+        return bucket;
+    }
+
+    private RegisteredFlowSnapshotMetadata createSnapshotMetadata(final 
GHCommit ghCommit, final FlowLocation flowLocation) throws IOException {
+        final GHCommit.ShortInfo shortInfo = ghCommit.getCommitShortInfo();
+
+        final RegisteredFlowSnapshotMetadata snapshotMetadata = new 
RegisteredFlowSnapshotMetadata();
+        snapshotMetadata.setBranch(flowLocation.getBranch());
+        snapshotMetadata.setBucketIdentifier(flowLocation.getBucketId());
+        snapshotMetadata.setFlowIdentifier(flowLocation.getFlowId());
+        snapshotMetadata.setVersion(ghCommit.getSHA1());
+        snapshotMetadata.setAuthor(ghCommit.getAuthor().getLogin());
+        snapshotMetadata.setComments(shortInfo.getMessage());
+        snapshotMetadata.setTimestamp(shortInfo.getCommitDate().getTime());
+        return snapshotMetadata;
+    }
+
+    private String getSnapshotFilePath(final FlowLocation flowLocation) {
+        return SNAPSHOT_FILE_PATH_FORMAT.formatted(flowLocation.getBucketId(), 
flowLocation.getFlowId());
+    }
+
+    private RegisteredFlowSnapshot getSnapshot(final String filePath, final 
String branch) throws IOException, FlowRegistryException {
+        try (final InputStream contentInputStream = 
repositoryClient.getContentFromBranch(filePath, branch)) {
+            return OBJECT_MAPPER.readValue(contentInputStream, 
RegisteredFlowSnapshot.class);
+        }
+    }
+
+    private RegisteredFlowSnapshot getSnapshot(final InputStream inputStream) 
throws IOException {
+        try {
+            return OBJECT_MAPPER.readValue(inputStream, 
RegisteredFlowSnapshot.class);
+        } finally {
+            IOUtils.closeQuietly(inputStream);
+        }
+    }
+
+    private Position replacePosition(final VersionedProcessGroup group, final 
Position newPosition) {
+        final Position originalPosition = group.getPosition();
+        group.setPosition(newPosition);
+        return originalPosition;
+    }
+
+    private String replaceGroupId(final VersionedProcessGroup group, final 
String newGroupId) {
+        final String originalGroupId = group.getIdentifier();
+        group.setIdentifier(newGroupId);
+
+        replaceGroupId(group.getProcessGroups(), newGroupId);
+        replaceGroupId(group.getRemoteProcessGroups(), newGroupId);
+        replaceGroupId(group.getProcessors(), newGroupId);
+        replaceGroupId(group.getFunnels(), newGroupId);
+        replaceGroupId(group.getLabels(), newGroupId);
+        replaceGroupId(group.getInputPorts(), newGroupId);
+        replaceGroupId(group.getOutputPorts(), newGroupId);
+        replaceGroupId(group.getControllerServices(), newGroupId);
+        replaceGroupId(group.getConnections(), newGroupId);
+
+        if (group.getConnections() != null) {
+            for (final VersionedConnection connection : 
group.getConnections()) {
+                replaceGroupId(connection.getSource(), originalGroupId, 
newGroupId);
+                replaceGroupId(connection.getDestination(), originalGroupId, 
newGroupId);
+            }
+        }
+
+        return originalGroupId;
+    }
+
+    private <T extends VersionedComponent> void replaceGroupId(final 
Collection<T> components, final String newGroupIdentifier) {
+        if (components == null) {
+            return;
+        }
+        components.forEach(c -> c.setGroupIdentifier(newGroupIdentifier));
+    }
+
+    private void replaceGroupId(final ConnectableComponent 
connectableComponent, final String originalGroupId, final String newGroupId) {
+        if (connectableComponent == null) {
+            return;
+        }
+
+        if (originalGroupId.equals(connectableComponent.getGroupId())) {
+            connectableComponent.setGroupId(newGroupId);
+        }
+    }
+
+    private void updateBucketReferences(final RegisteredFlowSnapshot 
flowSnapshot, final String bucketId) {
+        final FlowRegistryBucket bucket = createFlowRegistryBucket(bucketId);
+        flowSnapshot.setBucket(bucket);
+
+        final RegisteredFlow flow = flowSnapshot.getFlow();
+        flow.setBucketName(bucketId);
+        flow.setBucketIdentifier(bucketId);
+
+        final RegisteredFlowSnapshotMetadata snapshotMetadata = 
flowSnapshot.getSnapshotMetadata();
+        snapshotMetadata.setBucketIdentifier(bucketId);
+    }
+
+    // Ensures the snapshot has non-null flow and metadata fields, which would 
only be null if taking a flow from "Download Flow Definition" and adding 
directly to GitHub
+    private void populateFlowAndSnapshotMetadata(final RegisteredFlowSnapshot 
flowSnapshot, final FlowLocation flowLocation) {
+        if (flowSnapshot.getFlow() == null) {
+            final RegisteredFlow registeredFlow = new RegisteredFlow();
+            registeredFlow.setName(flowLocation.getFlowId());
+            registeredFlow.setIdentifier(flowLocation.getFlowId());
+            flowSnapshot.setFlow(registeredFlow);
+        }
+        if (flowSnapshot.getSnapshotMetadata() == null) {
+            final RegisteredFlowSnapshotMetadata snapshotMetadata = new 
RegisteredFlowSnapshotMetadata();
+            snapshotMetadata.setFlowIdentifier(flowLocation.getFlowId());
+            flowSnapshot.setSnapshotMetadata(snapshotMetadata);
+        }
+    }
+
+    private synchronized GitHubRepositoryClient getRepositoryClient(final 
FlowRegistryClientConfigurationContext context) throws IOException, 
FlowRegistryException {
+        if (!initialized.get()) {
+            getLogger().info("Initializing GitHub repository client");
+            repositoryClient = GitHubRepositoryClient.builder()
+                    .apiUrl(context.getProperty(GITHUB_API_URL).getValue())
+                    
.authenticationType(GitHubAuthenticationType.valueOf(context.getProperty(AUTHENTICATION_TYPE).getValue()))
+                    
.personalAccessToken(context.getProperty(PERSONAL_ACCESS_TOKEN).getValue())
+                    
.appInstallationToken(context.getProperty(APP_INSTALLATION_TOKEN).getValue())
+                    
.repoOwner(context.getProperty(REPOSITORY_OWNER).getValue())
+                    .repoName(context.getProperty(REPOSITORY_NAME).getValue())
+                    .repoPath(context.getProperty(REPOSITORY_PATH).getValue())
+                    .build();
+            initialized.set(true);
+
+            // If the client is configured to authenticate, then ensure the 
directory for the default bucket is present and if not create it,
+            // otherwise the client can only be used to import flows from the 
repo and won't be able to set up the default bucket
+            if (repositoryClient.getAuthenticationType() != 
GitHubAuthenticationType.NONE) {
+                final String branch = 
context.getProperty(REPOSITORY_BRANCH).getValue();
+                final Optional<String> defaultBucketKeepFileSha = 
repositoryClient.getContentSha(DEFAULT_BUCKET_KEEP_FILE_PATH, branch);
+                if (defaultBucketKeepFileSha.isPresent()) {
+                    getLogger().info("Found default bucket with SHA {}", 
defaultBucketKeepFileSha);
+                } else {
+                    getLogger().info("Creating default bucket");
+                    repositoryClient.createContent(

Review Comment:
   I'm not sure if I fully agree. I think this is the easiest way for people to 
get things working. Otherwise they are going to create the client and go to try 
and use it from NiFi and see no buckets in the list and be confused about what 
they should do.
   
   The call to create the bucket is only done if the client is configured with 
authentication, which in most cases would mean they have access to the repo. If 
you were connecting to a public repo for importing only, then you wouldn't 
provide any authentication and this call wouldn't happen. I suppose there is an 
in between case where its a private repo with read-only access, but not sure. 
Let me think more about this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to