This is an automated email from the ASF dual-hosted git repository.

tpalfy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/main by this push:
     new 5697523b41 NIFI-14287 FetchGoogleDrive supports downloading of 10MB+ 
Google files
5697523b41 is described below

commit 5697523b41dbc12d1f3923507441fec3d211d161
Author: Peter Turcsanyi <[email protected]>
AuthorDate: Thu Feb 20 18:36:18 2025 +0100

    NIFI-14287 FetchGoogleDrive supports downloading of 10MB+ Google files
    
    Also updated the export formats (removed unsupported export types, add 
Google Forms export).
    
    This closes #9735.
    
    Signed-off-by: Tamas Palfy <[email protected]>
---
 .../processors/gcp/drive/FetchGoogleDrive.java     | 103 ++++++++++++++-------
 .../processors/gcp/drive/FetchGoogleDriveTest.java |   8 +-
 2 files changed, 78 insertions(+), 33 deletions(-)

diff --git 
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
 
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
index 118acf70f3..6bedfe3865 100644
--- 
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
+++ 
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
@@ -17,6 +17,10 @@
 package org.apache.nifi.processors.gcp.drive;
 
 import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.http.GenericUrl;
+import com.google.api.client.http.HttpRequest;
+import com.google.api.client.http.HttpRequestFactory;
+import com.google.api.client.http.HttpResponse;
 import com.google.api.services.drive.Drive;
 import com.google.api.services.drive.DriveScopes;
 import com.google.api.services.drive.model.File;
@@ -112,39 +116,36 @@ import static 
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTA
 )
 public class FetchGoogleDrive extends AbstractProcessor implements 
GoogleDriveTrait {
 
+    private static final long EXPORT_SIZE_LIMIT = 10_000_000;
+    private static final String EXPORT_SIZE_ERROR = "exportSizeLimitExceeded";
+
     // Google Docs Export Types
     private static final AllowableValue EXPORT_MS_WORD = new 
AllowableValue("application/vnd.openxmlformats-officedocument.wordprocessingml.document",
 "Microsoft Word");
-    private static final AllowableValue EXPORT_OPEN_DOCUMENT = new 
AllowableValue("application/vnd.oasis.opendocument.text", "OpenDocument");
-    private static final AllowableValue EXPORT_PDF = new 
AllowableValue("application/pdf", "PDF");
+    private static final AllowableValue EXPORT_OPEN_DOCUMENT = new 
AllowableValue("application/vnd.oasis.opendocument.text", "OpenDocument Text");
     private static final AllowableValue EXPORT_RICH_TEXT = new 
AllowableValue("application/rtf", "Rich Text");
     private static final AllowableValue EXPORT_EPUB = new 
AllowableValue("application/epub+zip", "EPUB");
 
     // Shared Export Types
-    private static final AllowableValue EXPORT_HTML_DOC = new 
AllowableValue("application/zip", "Web Page (HTML)");
+    private static final AllowableValue EXPORT_PDF = new 
AllowableValue("application/pdf", "PDF");
+    private static final AllowableValue EXPORT_HTML = new 
AllowableValue("application/zip", "Web Page (HTML)");
     private static final AllowableValue EXPORT_PLAIN_TEXT = new 
AllowableValue("text/plain", "Plain Text");
 
     // Google Spreadsheet Export Types
     private static final AllowableValue EXPORT_MS_EXCEL = new 
AllowableValue("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
 "Microsoft Excel");
     private static final AllowableValue EXPORT_OPEN_SPREADSHEET = new 
AllowableValue("application/x-vnd.oasis.opendocument.spreadsheet", 
"OpenDocument Spreadsheet");
-    private static final AllowableValue EXPORT_PDF_SPREADSHEET = new 
AllowableValue("application/pdf", "PDF");
     private static final AllowableValue EXPORT_CSV = new 
AllowableValue("text/csv", "CSV (first sheet only)",
         "Comma-separated values. Only the first sheet will be exported.");
     private static final AllowableValue EXPORT_TSV = new 
AllowableValue("text/tab-separated-values", "TSV (first sheet only)",
         "Tab-separate values. Only the first sheet will be exported.");
-    private static final AllowableValue EXPORT_HTML_SPREADSHEET = new 
AllowableValue("text/html", "Web Page (HTML)");
 
     // Google Presentation Export Types
     private static final AllowableValue EXPORT_MS_POWERPOINT = new 
AllowableValue("application/vnd.openxmlformats-officedocument.presentationml.presentation",
 "Microsoft PowerPoint");
     private static final AllowableValue EXPORT_OPEN_PRESENTATION = new 
AllowableValue("application/vnd.oasis.opendocument.presentation", "OpenDocument 
Presentation");
-    private static final AllowableValue EXPORT_PNG = new 
AllowableValue("image/png", "PNG (first slide only)");
-    private static final AllowableValue EXPORT_JPEG = new 
AllowableValue("image/jpeg", "JPEG (first slide only)");
-    private static final AllowableValue EXPORT_SVG = new 
AllowableValue("image/svg+xml", "SVG (first slide only)",
-        "Scalable Vector Graphics. Only the first slide will be exported.");
 
     // Drawings Export Types
-    private static final AllowableValue EXPORT_PNG_DRAWING = new 
AllowableValue("image/png", "PNG");
-    private static final AllowableValue EXPORT_JPEG_DRAWING = new 
AllowableValue("image/jpeg", "JPEG");
-    private static final AllowableValue EXPORT_SVG_DRAWING = new 
AllowableValue("image/svg+xml", "SVG");
+    private static final AllowableValue EXPORT_PNG = new 
AllowableValue("image/png", "PNG");
+    private static final AllowableValue EXPORT_JPEG = new 
AllowableValue("image/jpeg", "JPEG");
+    private static final AllowableValue EXPORT_SVG = new 
AllowableValue("image/svg+xml", "SVG");
 
     private static final Map<String, String> fileExtensions = new HashMap<>();
     static {
@@ -153,7 +154,7 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
         fileExtensions.put(EXPORT_PDF.getValue(), ".pdf");
         fileExtensions.put(EXPORT_RICH_TEXT.getValue(), ".rtf");
         fileExtensions.put(EXPORT_EPUB.getValue(), ".epub");
-        fileExtensions.put(EXPORT_HTML_DOC.getValue(), ".zip");
+        fileExtensions.put(EXPORT_HTML.getValue(), ".zip");
         fileExtensions.put(EXPORT_PLAIN_TEXT.getValue(), ".txt");
         fileExtensions.put(EXPORT_MS_EXCEL.getValue(), ".xlsx");
         fileExtensions.put(EXPORT_OPEN_SPREADSHEET.getValue(), ".ods");
@@ -184,8 +185,8 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
             "that the incoming FlowFile's MIME Type indicates that the file is 
a Google Document, this property specifies the MIME Type to export the document 
to.")
         .required(true)
         .allowableValues(
-            EXPORT_PDF, EXPORT_PLAIN_TEXT, EXPORT_MS_WORD,
-            EXPORT_OPEN_DOCUMENT, EXPORT_RICH_TEXT, EXPORT_HTML_DOC, 
EXPORT_EPUB)
+            EXPORT_PDF, EXPORT_MS_WORD, EXPORT_OPEN_DOCUMENT,
+            EXPORT_PLAIN_TEXT, EXPORT_RICH_TEXT, EXPORT_HTML, EXPORT_EPUB)
         .defaultValue(EXPORT_PDF.getValue())
         .build();
 
@@ -195,9 +196,9 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
             "that the incoming FlowFile's MIME Type indicates that the file is 
a Google Spreadsheet, this property specifies the MIME Type to export the 
spreadsheet to.")
         .required(true)
         .allowableValues(
-            EXPORT_CSV, EXPORT_MS_EXCEL, EXPORT_PDF_SPREADSHEET,
-            EXPORT_TSV, EXPORT_HTML_SPREADSHEET, EXPORT_OPEN_SPREADSHEET)
-        .defaultValue(EXPORT_CSV.getValue())
+            EXPORT_PDF, EXPORT_MS_EXCEL, EXPORT_OPEN_SPREADSHEET,
+            EXPORT_CSV, EXPORT_TSV, EXPORT_HTML)
+        .defaultValue(EXPORT_PDF.getValue())
         .build();
 
     public static final PropertyDescriptor GOOGLE_PRESENTATION_EXPORT_TYPE = 
new PropertyDescriptor.Builder()
@@ -206,8 +207,7 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
             "that the incoming FlowFile's MIME Type indicates that the file is 
a Google Presentation, this property specifies the MIME Type to export the 
presentation to.")
         .required(true)
         .allowableValues(
-            EXPORT_PDF, EXPORT_MS_POWERPOINT, EXPORT_PLAIN_TEXT, 
EXPORT_OPEN_PRESENTATION,
-            EXPORT_PNG, EXPORT_JPEG, EXPORT_SVG)
+            EXPORT_PDF, EXPORT_MS_POWERPOINT, EXPORT_OPEN_PRESENTATION, 
EXPORT_PLAIN_TEXT)
         .defaultValue(EXPORT_PDF.getValue())
         .build();
 
@@ -217,7 +217,7 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
             "that the incoming FlowFile's MIME Type indicates that the file is 
a Google Drawing, this property specifies the MIME Type to export the drawing 
to.")
         .required(true)
         .allowableValues(
-            EXPORT_PDF, EXPORT_PNG_DRAWING, EXPORT_JPEG_DRAWING, 
EXPORT_SVG_DRAWING)
+            EXPORT_PDF, EXPORT_PNG, EXPORT_JPEG, EXPORT_SVG)
         .defaultValue(EXPORT_PDF.getValue())
         .build();
 
@@ -287,7 +287,7 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
             final File fileMetadata = fetchFileMetadata(fileId);
             final Map<String, String> attributeMap = 
createAttributeMap(fileMetadata);
 
-            flowFile = fetchFile(fileId, session, context, flowFile, 
attributeMap);
+            flowFile = fetchFile(fileMetadata, session, context, flowFile, 
attributeMap);
 
             flowFile = session.putAllAttributes(flowFile, attributeMap);
 
@@ -312,20 +312,22 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
             case "application/vnd.google-apps.spreadsheet" -> 
context.getProperty(GOOGLE_SPREADSHEET_EXPORT_TYPE).getValue();
             case "application/vnd.google-apps.presentation" -> 
context.getProperty(GOOGLE_PRESENTATION_EXPORT_TYPE).getValue();
             case "application/vnd.google-apps.drawing" -> 
context.getProperty(GOOGLE_DRAWING_EXPORT_TYPE).getValue();
+            case "application/vnd.google-apps.form" -> "application/zip";
             case "application/vnd.google-apps.script" -> 
"application/vnd.google-apps.script+json";
             default -> null;
         };
     }
 
-    private FlowFile fetchFile(final String fileId, final ProcessSession 
session, final ProcessContext context, final FlowFile flowFile, final 
Map<String, String> attributeMap) throws IOException {
-        final String mimeType = 
flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
+    private FlowFile fetchFile(final File fileMetadata, final ProcessSession 
session, final ProcessContext context, final FlowFile flowFile, final 
Map<String, String> attributeMap)
+            throws IOException {
+        final String mimeType = fileMetadata.getMimeType();
         final String exportType = getExportType(mimeType, context);
 
         if (exportType == null) {
-            return downloadFile(fileId, session, flowFile);
+            return downloadFile(fileMetadata.getId(), session, flowFile);
         }
 
-        return exportFile(fileId, exportType, session, flowFile, attributeMap);
+        return exportFile(fileMetadata, exportType, session, flowFile, 
attributeMap);
     }
 
     private FlowFile downloadFile(final String fileId, final ProcessSession 
session, final FlowFile flowFile) throws IOException {
@@ -339,7 +341,8 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
         }
     }
 
-    private FlowFile exportFile(final String fileId, final String 
exportMimeType, final ProcessSession session, final FlowFile flowFile, final 
Map<String, String> attributeMap) throws IOException {
+    private FlowFile exportFile(final File fileMetadata, final String 
exportMimeType, final ProcessSession session, final FlowFile flowFile, final 
Map<String, String> attributeMap)
+            throws IOException {
         attributeMap.put(CoreAttributes.MIME_TYPE.key(), exportMimeType);
 
         final String fileExtension = fileExtensions.get(exportMimeType);
@@ -347,22 +350,58 @@ public class FetchGoogleDrive extends AbstractProcessor 
implements GoogleDriveTr
             attributeMap.put(CoreAttributes.FILENAME.key(), 
flowFile.getAttribute(CoreAttributes.FILENAME.key()) + fileExtension);
         }
 
-        try (final InputStream driveFileInputStream = driveService
-            .files()
-            .export(fileId, exportMimeType)
-            .executeMediaAsInputStream()) {
+        if (fileMetadata.getSize() == null || fileMetadata.getSize() < 
EXPORT_SIZE_LIMIT) {
+            try (final InputStream driveFileInputStream = driveService
+                    .files()
+                    .export(fileMetadata.getId(), exportMimeType)
+                    .executeMediaAsInputStream()) {
+
+                return session.importFrom(driveFileInputStream, flowFile);
+            } catch (GoogleJsonResponseException e) {
+                if (!e.getContent().contains(EXPORT_SIZE_ERROR)) {
+                    throw e;
+                }
+                // fall back to export link
+            }
+        }
+
+        final String exportLink = getExportLink(fileMetadata, exportMimeType);
+
+        final HttpRequestFactory requestFactory = 
driveService.getRequestFactory();
+        final HttpRequest request = requestFactory.buildGetRequest(new 
GenericUrl(exportLink));
+        final HttpResponse response = request.execute();
 
+        try (final InputStream driveFileInputStream = response.getContent()) {
             return session.importFrom(driveFileInputStream, flowFile);
         }
     }
 
+    private String getExportLink(final File fileMetadata, final String 
exportType) {
+        if (exportType == null) {
+            return null;
+        }
+
+        final Map<String, String> exportLinks = fileMetadata.getExportLinks();
+        if (exportLinks == null) {
+            throw new ProcessException(String.format("Export links cannot be 
found for file [id=%s, mimeType=%s]",
+                    fileMetadata.getId(), fileMetadata.getMimeType()));
+        }
+
+        final String exportLink = 
fileMetadata.getExportLinks().get(exportType);
+        if (exportLink == null) {
+            throw new ProcessException(String.format("Export link cannot be 
found for file [id=%s, mimeType=%s, exportType=%s, exportLinks=%s]",
+                    fileMetadata.getId(), fileMetadata.getMimeType(), 
exportType, exportLinks));
+        }
+
+        return exportLink;
+    }
 
     private File fetchFileMetadata(final String fileId) throws IOException {
         return driveService
                 .files()
                 .get(fileId)
                 .setSupportsAllDrives(true)
-                .setFields("id, name, createdTime, mimeType, size")
+                .setFields("id, name, createdTime, mimeType, size, 
exportLinks")
                 .execute();
     }
 
diff --git 
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDriveTest.java
 
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDriveTest.java
index e6df5f6901..b2bd9716a6 100644
--- 
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDriveTest.java
+++ 
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDriveTest.java
@@ -104,7 +104,7 @@ public class FetchGoogleDriveTest extends 
AbstractGoogleDriveTest {
         when(mockDriverService.files()
                 .get(fileId)
                 .setSupportsAllDrives(true)
-                .setFields("id, name, createdTime, mimeType, size")
+                .setFields("id, name, createdTime, mimeType, size, 
exportLinks")
                 .execute()).thenReturn(createFile());
     }
 
@@ -114,6 +114,12 @@ public class FetchGoogleDriveTest extends 
AbstractGoogleDriveTest {
                 .setSupportsAllDrives(true)
                 .executeMediaAsInputStream())
                 .thenThrow(exception);
+
+        when(mockDriverService.files()
+                .get(fileId)
+                .setSupportsAllDrives(true)
+                .setFields("id, name, createdTime, mimeType, size, 
exportLinks")
+                .execute()).thenReturn(createFile());
     }
 
     private void runWithFlowFile() {

Reply via email to