This is an automated email from the ASF dual-hosted git repository.

exceptionfactory pushed a commit to branch support/nifi-1.x
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/support/nifi-1.x by this push:
     new 2e317e02a6 NIFI-11911 Updated FetchGoogleDrive to support Export Types
2e317e02a6 is described below

commit 2e317e02a6c9c931c3914c5ab1079b2a727f14fc
Author: Mark Payne <[email protected]>
AuthorDate: Thu Aug 3 18:49:35 2023 -0400

    NIFI-11911 Updated FetchGoogleDrive to support Export Types
    
    - Export API support includes Google Docs, Presentations, Spreadsheets, 
Drawings, and AppScripts
    
    This closes #7575
    
    Signed-off-by: David Handermann <[email protected]>
    (cherry picked from commit 485112e54f93d7921c33849a943c93b973c20b5c)
---
 .../processors/gcp/drive/FetchGoogleDrive.java     | 240 +++++++++++++++++----
 1 file changed, 201 insertions(+), 39 deletions(-)

diff --git 
a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
 
b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
index 2f18170758..2aca33f02f 100644
--- 
a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
+++ 
b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
@@ -16,32 +16,10 @@
  */
 package org.apache.nifi.processors.gcp.drive;
 
-import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_CODE;
-import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_CODE_DESC;
-import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_MESSAGE;
-import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_MESSAGE_DESC;
-import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.FILENAME_DESC;
-import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID;
-import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID_DESC;
-import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.MIME_TYPE_DESC;
-import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE;
-import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_DESC;
-import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP;
-import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP_DESC;
-
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
 import com.google.api.services.drive.Drive;
 import com.google.api.services.drive.DriveScopes;
 import com.google.api.services.drive.model.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
 import org.apache.nifi.annotation.behavior.InputRequirement;
 import org.apache.nifi.annotation.behavior.ReadsAttribute;
 import org.apache.nifi.annotation.behavior.WritesAttribute;
@@ -50,9 +28,11 @@ import 
org.apache.nifi.annotation.documentation.CapabilityDescription;
 import org.apache.nifi.annotation.documentation.SeeAlso;
 import org.apache.nifi.annotation.documentation.Tags;
 import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.components.AllowableValue;
 import org.apache.nifi.components.PropertyDescriptor;
 import org.apache.nifi.expression.ExpressionLanguageScope;
 import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.flowfile.attributes.CoreAttributes;
 import org.apache.nifi.processor.AbstractProcessor;
 import org.apache.nifi.processor.ProcessContext;
 import org.apache.nifi.processor.ProcessSession;
@@ -63,6 +43,30 @@ import 
org.apache.nifi.processors.gcp.ProxyAwareTransportFactory;
 import org.apache.nifi.processors.gcp.util.GoogleUtils;
 import org.apache.nifi.proxy.ProxyConfiguration;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_CODE;
+import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_CODE_DESC;
+import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_MESSAGE;
+import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_MESSAGE_DESC;
+import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.FILENAME_DESC;
+import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID;
+import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID_DESC;
+import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.MIME_TYPE_DESC;
+import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE;
+import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_DESC;
+import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP;
+import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP_DESC;
+
 @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
 @Tags({"google", "drive", "storage", "fetch"})
 @CapabilityDescription("Fetches files from a Google Drive Folder. Designed to 
be used in tandem with ListGoogleDrive. " +
@@ -80,6 +84,62 @@ import org.apache.nifi.proxy.ProxyConfiguration;
 })
 public class FetchGoogleDrive extends AbstractProcessor implements 
GoogleDriveTrait {
 
+    // Google Docs Export Types
+    private static final AllowableValue EXPORT_MS_WORD = new 
AllowableValue("application/vnd.openxmlformats-officedocument.wordprocessingml.document",
 "Microsoft Word");
+    private static final AllowableValue EXPORT_OPEN_DOCUMENT = new 
AllowableValue("application/vnd.oasis.opendocument.text", "OpenDocument");
+    private static final AllowableValue EXPORT_PDF = new 
AllowableValue("application/pdf", "PDF");
+    private static final AllowableValue EXPORT_RICH_TEXT = new 
AllowableValue("application/rtf", "Rich Text");
+    private static final AllowableValue EXPORT_EPUB = new 
AllowableValue("application/epub+zip", "EPUB");
+
+    // Shared Export Types
+    private static final AllowableValue EXPORT_HTML_DOC = new 
AllowableValue("application/zip", "Web Page (HTML)");
+    private static final AllowableValue EXPORT_PLAIN_TEXT = new 
AllowableValue("text/plain", "Plain Text");
+
+    // Google Spreadsheet Export Types
+    private static final AllowableValue EXPORT_MS_EXCEL = new 
AllowableValue("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
 "Microsoft Excel");
+    private static final AllowableValue EXPORT_OPEN_SPREADSHEET = new 
AllowableValue("application/x-vnd.oasis.opendocument.spreadsheet", 
"OpenDocument Spreadsheet");
+    private static final AllowableValue EXPORT_PDF_SPREADSHEET = new 
AllowableValue("application/pdf", "PDF");
+    private static final AllowableValue EXPORT_CSV = new 
AllowableValue("text/csv", "CSV (first sheet only)",
+        "Comma-separated values. Only the first sheet will be exported.");
+    private static final AllowableValue EXPORT_TSV = new 
AllowableValue("text/tab-separated-values", "TSV (first sheet only)",
+        "Tab-separate values. Only the first sheet will be exported.");
+    private static final AllowableValue EXPORT_HTML_SPREADSHEET = new 
AllowableValue("text/html", "Web Page (HTML)");
+
+    // Google Presentation Export Types
+    private static final AllowableValue EXPORT_MS_POWERPOINT = new 
AllowableValue("application/vnd.openxmlformats-officedocument.presentationml.presentation",
 "Microsoft PowerPoint");
+    private static final AllowableValue EXPORT_OPEN_PRESENTATION = new 
AllowableValue("application/vnd.oasis.opendocument.presentation", "OpenDocument 
Presentation");
+    private static final AllowableValue EXPORT_PNG = new 
AllowableValue("image/png", "PNG (first slide only)");
+    private static final AllowableValue EXPORT_JPEG = new 
AllowableValue("image/jpeg", "JPEG (first slide only)");
+    private static final AllowableValue EXPORT_SVG = new 
AllowableValue("image/svg+xml", "SVG (first slide only)",
+        "Scalable Vector Graphics. Only the first slide will be exported.");
+
+    // Drawings Export Types
+    private static final AllowableValue EXPORT_PNG_DRAWING = new 
AllowableValue("image/png", "PNG");
+    private static final AllowableValue EXPORT_JPEG_DRAWING = new 
AllowableValue("image/jpeg", "JPEG");
+    private static final AllowableValue EXPORT_SVG_DRAWING = new 
AllowableValue("image/svg+xml", "SVG");
+
+    private static final Map<String, String> fileExtensions = new HashMap<>();
+    static {
+        fileExtensions.put(EXPORT_MS_WORD.getValue(), ".docx");
+        fileExtensions.put(EXPORT_OPEN_DOCUMENT.getValue(), ".odt");
+        fileExtensions.put(EXPORT_PDF.getValue(), ".pdf");
+        fileExtensions.put(EXPORT_RICH_TEXT.getValue(), ".rtf");
+        fileExtensions.put(EXPORT_EPUB.getValue(), ".epub");
+        fileExtensions.put(EXPORT_HTML_DOC.getValue(), ".zip");
+        fileExtensions.put(EXPORT_PLAIN_TEXT.getValue(), ".txt");
+        fileExtensions.put(EXPORT_MS_EXCEL.getValue(), ".xlsx");
+        fileExtensions.put(EXPORT_OPEN_SPREADSHEET.getValue(), ".ods");
+        fileExtensions.put(EXPORT_CSV.getValue(), ".csv");
+        fileExtensions.put(EXPORT_TSV.getValue(), ".tsv");
+        fileExtensions.put(EXPORT_MS_POWERPOINT.getValue(), ".pptx");
+        fileExtensions.put(EXPORT_OPEN_PRESENTATION.getValue(), ".odp");
+        fileExtensions.put(EXPORT_PNG.getValue(), ".png");
+        fileExtensions.put(EXPORT_JPEG.getValue(), ".jpg");
+        fileExtensions.put(EXPORT_SVG.getValue(), ".svg");
+        fileExtensions.put("application/vnd.google-apps.script+json", ".json");
+    }
+
+
     public static final PropertyDescriptor FILE_ID = new PropertyDescriptor
             .Builder().name("drive-file-id")
             .displayName("File ID")
@@ -91,6 +151,53 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
             .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
             .build();
 
+    public static final PropertyDescriptor GOOGLE_DOC_EXPORT_TYPE = new 
PropertyDescriptor.Builder()
+        .name("Google Doc Export Type")
+        .description("Google Documents cannot be downloaded directly from 
Google Drive but instead must be exported to a specified MIME Type. In the 
event " +
+            "that the incoming FlowFile's MIME Type indicates that the file is 
a Google Document, this property specifies the MIME Type to export the document 
to.")
+        .required(true)
+        .allowableValues(
+            EXPORT_PDF, EXPORT_PLAIN_TEXT, EXPORT_MS_WORD,
+            EXPORT_OPEN_DOCUMENT, EXPORT_RICH_TEXT, EXPORT_HTML_DOC, 
EXPORT_EPUB)
+        .defaultValue(EXPORT_PDF.getValue())
+        .build();
+
+    public static final PropertyDescriptor GOOGLE_SPREADSHEET_EXPORT_TYPE = 
new PropertyDescriptor.Builder()
+        .name("Google Spreadsheet Export Type")
+        .description("Google Spreadsheets cannot be downloaded directly from 
Google Drive but instead must be exported to a specified MIME Type. In the 
event " +
+            "that the incoming FlowFile's MIME Type indicates that the file is 
a Google Spreadsheet, this property specifies the MIME Type to export the 
spreadsheet to.")
+        .required(true)
+        .allowableValues(
+            EXPORT_CSV, EXPORT_MS_EXCEL, EXPORT_PDF_SPREADSHEET,
+            EXPORT_TSV, EXPORT_HTML_SPREADSHEET, EXPORT_OPEN_SPREADSHEET)
+        .defaultValue(EXPORT_CSV.getValue())
+        .build();
+
+    public static final PropertyDescriptor GOOGLE_PRESENTATION_EXPORT_TYPE = 
new PropertyDescriptor.Builder()
+        .name("Google Presentation Export Type")
+        .description("Google Presentations cannot be downloaded directly from 
Google Drive but instead must be exported to a specified MIME Type. In the 
event " +
+            "that the incoming FlowFile's MIME Type indicates that the file is 
a Google Presentation, this property specifies the MIME Type to export the 
presentation to.")
+        .required(true)
+        .allowableValues(
+            EXPORT_PDF, EXPORT_MS_POWERPOINT, EXPORT_PLAIN_TEXT, 
EXPORT_OPEN_PRESENTATION,
+            EXPORT_PNG, EXPORT_JPEG, EXPORT_SVG)
+        .defaultValue(EXPORT_PDF.getValue())
+        .build();
+
+    public static final PropertyDescriptor GOOGLE_DRAWING_EXPORT_TYPE = new 
PropertyDescriptor.Builder()
+        .name("Google Drawing Export Type")
+        .description("Google Drawings cannot be downloaded directly from 
Google Drive but instead must be exported to a specified MIME Type. In the 
event " +
+            "that the incoming FlowFile's MIME Type indicates that the file is 
a Google Drawing, this property specifies the MIME Type to export the drawing 
to.")
+        .required(true)
+        .allowableValues(
+            EXPORT_PDF, EXPORT_PNG_DRAWING, EXPORT_JPEG_DRAWING, 
EXPORT_SVG_DRAWING)
+        .defaultValue(EXPORT_PDF.getValue())
+        .build();
+
+
+
+
+
     public static final Relationship REL_SUCCESS =
             new Relationship.Builder()
                     .name("success")
@@ -103,14 +210,18 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
                     .build();
 
     private static final List<PropertyDescriptor> PROPERTIES = 
Collections.unmodifiableList(Arrays.asList(
-            GoogleUtils.GCP_CREDENTIALS_PROVIDER_SERVICE,
-            FILE_ID,
-            ProxyConfiguration.createProxyConfigPropertyDescriptor(false, 
ProxyAwareTransportFactory.PROXY_SPECS)
+        GoogleUtils.GCP_CREDENTIALS_PROVIDER_SERVICE,
+        FILE_ID,
+        ProxyConfiguration.createProxyConfigPropertyDescriptor(false, 
ProxyAwareTransportFactory.PROXY_SPECS),
+        GOOGLE_DOC_EXPORT_TYPE,
+        GOOGLE_SPREADSHEET_EXPORT_TYPE,
+        GOOGLE_PRESENTATION_EXPORT_TYPE,
+        GOOGLE_DRAWING_EXPORT_TYPE
     ));
 
     public static final Set<Relationship> RELATIONSHIPS = 
Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
-            REL_SUCCESS,
-            REL_FAILURE
+        REL_SUCCESS,
+        REL_FAILURE
     )));
 
     private volatile Drive driveService;
@@ -147,11 +258,12 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
 
         final long startNanos = System.nanoTime();
         try {
-            flowFile = fetchFile(fileId, session, flowFile);
-
             final File fileMetadata = fetchFileMetadata(fileId);
-            final Map<String, String> attributes = 
createAttributeMap(fileMetadata);
-            flowFile = session.putAllAttributes(flowFile, attributes);
+            final Map<String, String> attributeMap = 
createAttributeMap(fileMetadata);
+
+            flowFile = fetchFile(fileId, session, context, flowFile, 
attributeMap);
+
+            flowFile = session.putAllAttributes(flowFile, attributeMap);
 
             final String url = DRIVE_URL + fileMetadata.getId();
             final long transferMillis = 
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
@@ -164,18 +276,68 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
         }
     }
 
-    private FlowFile fetchFile(String fileId, ProcessSession session, FlowFile 
flowFile) throws IOException {
+    private String getExportType(final String mimeType, final ProcessContext 
context) {
+        if (mimeType == null) {
+            return null;
+        }
+
+        switch (mimeType) {
+            case "application/vnd.google-apps.document":
+                return context.getProperty(GOOGLE_DOC_EXPORT_TYPE).getValue();
+            case "application/vnd.google-apps.spreadsheet":
+                return 
context.getProperty(GOOGLE_SPREADSHEET_EXPORT_TYPE).getValue();
+            case "application/vnd.google-apps.presentation":
+                return 
context.getProperty(GOOGLE_PRESENTATION_EXPORT_TYPE).getValue();
+            case "application/vnd.google-apps.drawing":
+                return 
context.getProperty(GOOGLE_DRAWING_EXPORT_TYPE).getValue();
+            case "application/vnd.google-apps.script":
+                return "application/vnd.google-apps.script+json";
+            default:
+                return null;
+        }
+    }
+
+    private FlowFile fetchFile(final String fileId, final ProcessSession 
session, final ProcessContext context, final FlowFile flowFile, final 
Map<String, String> attributeMap) throws IOException {
+        final String mimeType = 
flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
+        final String exportType = getExportType(mimeType, context);
+
+        if (exportType == null) {
+            return downloadFile(fileId, session, flowFile);
+        }
+
+        return exportFile(fileId, exportType, session, flowFile, attributeMap);
+    }
+
+    private FlowFile downloadFile(final String fileId, final ProcessSession 
session, final FlowFile flowFile) throws IOException {
         try (final InputStream driveFileInputStream = driveService
-                .files()
-                .get(fileId)
-                .setSupportsAllDrives(true)
-                .executeMediaAsInputStream()) {
+            .files()
+            .get(fileId)
+            .setSupportsAllDrives(true)
+            .executeMediaAsInputStream()) {
 
             return session.importFrom(driveFileInputStream, flowFile);
         }
     }
 
-    private File fetchFileMetadata(String fileId) throws IOException {
+    private FlowFile exportFile(final String fileId, final String 
exportMimeType, final ProcessSession session, final FlowFile flowFile, final 
Map<String, String> attributeMap) throws IOException {
+        attributeMap.put(CoreAttributes.MIME_TYPE.key(), exportMimeType);
+
+        final String fileExtension = fileExtensions.get(exportMimeType);
+        if (fileExtension != null) {
+            attributeMap.put(CoreAttributes.FILENAME.key(), 
flowFile.getAttribute(CoreAttributes.FILENAME.key()) + fileExtension);
+        }
+
+        try (final InputStream driveFileInputStream = driveService
+            .files()
+            .export(fileId, exportMimeType)
+            .executeMediaAsInputStream()) {
+
+            return session.importFrom(driveFileInputStream, flowFile);
+        }
+    }
+
+
+    private File fetchFileMetadata(final String fileId) throws IOException {
         return driveService
                 .files()
                 .get(fileId)
@@ -184,7 +346,7 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
                 .execute();
     }
 
-    private void handleErrorResponse(ProcessSession session, String fileId, 
FlowFile flowFile, GoogleJsonResponseException e) {
+    private void handleErrorResponse(final ProcessSession session, final 
String fileId, FlowFile flowFile, final GoogleJsonResponseException e) {
         getLogger().error("Fetching File [{}] failed", fileId, e);
 
         flowFile = session.putAttribute(flowFile, ERROR_CODE, "" + 
e.getStatusCode());
@@ -194,7 +356,7 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
         session.transfer(flowFile, REL_FAILURE);
     }
 
-    private void handleUnexpectedError(ProcessSession session, FlowFile 
flowFile, String fileId, Exception e) {
+    private void handleUnexpectedError(final ProcessSession session, FlowFile 
flowFile, final String fileId, final Exception e) {
         getLogger().error("Fetching File [{}] failed", fileId, e);
 
         flowFile = session.putAttribute(flowFile, ERROR_MESSAGE, 
e.getMessage());

Reply via email to