This is an automated email from the ASF dual-hosted git repository.
tpalfy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/main by this push:
new daefc6d651 NIFI-14280 Handle files having no size metadata in Google
Drive processors
daefc6d651 is described below
commit daefc6d651ac4fc07879501cdfd4a8b0d3aee12a
Author: Peter Turcsanyi <[email protected]>
AuthorDate: Wed Feb 19 16:11:15 2025 +0100
NIFI-14280 Handle files having no size metadata in Google Drive processors
This closes #9731.
Signed-off-by: Tamas Palfy <[email protected]>
---
.../nifi/processors/gcp/drive/FetchGoogleDrive.java | 3 +++
.../processors/gcp/drive/GoogleDriveAttributes.java | 5 ++++-
.../nifi/processors/gcp/drive/GoogleDriveFileInfo.java | 15 +++++++++++++++
.../gcp/drive/GoogleDriveFlowFileAttribute.java | 12 +++---------
.../nifi/processors/gcp/drive/GoogleDriveTrait.java | 3 ++-
.../nifi/processors/gcp/drive/ListGoogleDrive.java | 6 +++++-
.../nifi/processors/gcp/drive/PutGoogleDrive.java | 3 +++
.../nifi/processors/gcp/drive/FetchGoogleDriveIT.java | 1 +
.../gcp/drive/ListGoogleDriveTestRunnerTest.java | 17 ++++++++++++++++-
9 files changed, 52 insertions(+), 13 deletions(-)
diff --git
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
index d9954e986c..0ea90a01e6 100644
---
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
+++
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java
@@ -61,6 +61,8 @@ import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID_DESC;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.MIME_TYPE_DESC;
import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE;
+import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_AVAILABLE;
+import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_AVAILABLE_DESC;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_DESC;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP_DESC;
@@ -76,6 +78,7 @@ import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTA
@WritesAttribute(attribute = "filename", description = FILENAME_DESC),
@WritesAttribute(attribute = "mime.type", description =
MIME_TYPE_DESC),
@WritesAttribute(attribute = SIZE, description = SIZE_DESC),
+ @WritesAttribute(attribute = SIZE_AVAILABLE, description =
SIZE_AVAILABLE_DESC),
@WritesAttribute(attribute = TIMESTAMP, description = TIMESTAMP_DESC),
@WritesAttribute(attribute = ERROR_CODE, description =
ERROR_CODE_DESC),
@WritesAttribute(attribute = ERROR_MESSAGE, description =
ERROR_MESSAGE_DESC)
diff --git
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveAttributes.java
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveAttributes.java
index 6c4eb47fe5..82fc4b239b 100644
---
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveAttributes.java
+++
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveAttributes.java
@@ -27,7 +27,10 @@ public class GoogleDriveAttributes {
public static final String FILENAME_DESC = "The name of the file";
public static final String SIZE = "drive.size";
- public static final String SIZE_DESC = "The size of the file";
+ public static final String SIZE_DESC = "The size of the file. Set to 0
when the file size is not available (e.g. externally stored files).";
+
+ public static final String SIZE_AVAILABLE = "drive.size.available";
+ public static final String SIZE_AVAILABLE_DESC = "Indicates if the file
size is known / available";
public static final String TIMESTAMP = "drive.timestamp";
public static final String TIMESTAMP_DESC = "The last modified time or
created time (whichever is greater) of the file." +
diff --git
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveFileInfo.java
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveFileInfo.java
index 7859a57bdd..12058dcbed 100644
---
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveFileInfo.java
+++
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveFileInfo.java
@@ -20,6 +20,7 @@ import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.FILENAM
import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.MIME_TYPE;
import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE;
+import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_AVAILABLE;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP;
import org.apache.nifi.processor.util.list.ListableEntity;
@@ -44,6 +45,7 @@ public class GoogleDriveFileInfo implements ListableEntity {
recordFields.add(new RecordField(ID,
RecordFieldType.STRING.getDataType(), false));
recordFields.add(new RecordField(FILENAME,
RecordFieldType.STRING.getDataType(), false));
recordFields.add(new RecordField(SIZE,
RecordFieldType.LONG.getDataType(), false));
+ recordFields.add(new RecordField(SIZE_AVAILABLE,
RecordFieldType.BOOLEAN.getDataType(), false));
recordFields.add(new RecordField(TIMESTAMP,
RecordFieldType.LONG.getDataType(), false));
recordFields.add(new RecordField(MIME_TYPE,
RecordFieldType.STRING.getDataType()));
@@ -53,6 +55,7 @@ public class GoogleDriveFileInfo implements ListableEntity {
private final String id;
private final String fileName;
private final long size;
+ private final boolean sizeAvailable;
private final long createdTime;
private final long modifiedTime;
private final String mimeType;
@@ -65,6 +68,10 @@ public class GoogleDriveFileInfo implements ListableEntity {
return fileName;
}
+ public boolean isSizeAvailable() {
+ return sizeAvailable;
+ }
+
public long getCreatedTime() {
return createdTime;
}
@@ -84,6 +91,7 @@ public class GoogleDriveFileInfo implements ListableEntity {
values.put(ID, getId());
values.put(FILENAME, getName());
values.put(SIZE, getSize());
+ values.put(SIZE_AVAILABLE, isSizeAvailable());
values.put(TIMESTAMP, getTimestamp());
values.put(MIME_TYPE, getMimeType());
@@ -98,6 +106,7 @@ public class GoogleDriveFileInfo implements ListableEntity {
private String id;
private String fileName;
private long size;
+ private boolean sizeAvailable;
private long createdTime;
private long modifiedTime;
private String mimeType;
@@ -117,6 +126,11 @@ public class GoogleDriveFileInfo implements ListableEntity
{
return this;
}
+ public Builder sizeAvailable(boolean sizeAvailable) {
+ this.sizeAvailable = sizeAvailable;
+ return this;
+ }
+
public Builder createdTime(long createdTime) {
this.createdTime = createdTime;
return this;
@@ -172,6 +186,7 @@ public class GoogleDriveFileInfo implements ListableEntity {
this.id = builder.id;
this.fileName = builder.fileName;
this.size = builder.size;
+ this.sizeAvailable = builder.sizeAvailable;
this.createdTime = builder.createdTime;
this.modifiedTime = builder.modifiedTime;
this.mimeType = builder.mimeType;
diff --git
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveFlowFileAttribute.java
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveFlowFileAttribute.java
index 95f07a6a20..6f363d5954 100644
---
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveFlowFileAttribute.java
+++
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveFlowFileAttribute.java
@@ -18,20 +18,14 @@ package org.apache.nifi.processors.gcp.drive;
import org.apache.nifi.serialization.record.Record;
-import java.util.Optional;
import java.util.function.Function;
public enum GoogleDriveFlowFileAttribute {
ID(GoogleDriveAttributes.ID, GoogleDriveFileInfo::getId),
FILENAME(GoogleDriveAttributes.FILENAME, GoogleDriveFileInfo::getName),
- SIZE(GoogleDriveAttributes.SIZE, fileInfo ->
Optional.ofNullable(fileInfo.getSize())
- .map(String::valueOf)
- .orElse(null)
- ),
- TIMESTAMP(GoogleDriveAttributes.TIMESTAMP, fileInfo ->
Optional.ofNullable(fileInfo.getTimestamp())
- .map(String::valueOf)
- .orElse(null)
- ),
+ SIZE(GoogleDriveAttributes.SIZE, fileInfo ->
String.valueOf(fileInfo.getSize())),
+ SIZE_AVAILABLE(GoogleDriveAttributes.SIZE_AVAILABLE, fileInfo ->
String.valueOf(fileInfo.isSizeAvailable())),
+ TIMESTAMP(GoogleDriveAttributes.TIMESTAMP, fileInfo ->
String.valueOf(fileInfo.getTimestamp())),
MIME_TYPE(GoogleDriveAttributes.MIME_TYPE,
GoogleDriveFileInfo::getMimeType);
private final String name;
diff --git
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveTrait.java
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveTrait.java
index c9a19fc42b..8b4a6f97ad 100644
---
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveTrait.java
+++
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/GoogleDriveTrait.java
@@ -79,7 +79,8 @@ public interface GoogleDriveTrait {
attributes.put(GoogleDriveAttributes.FILENAME, file.getName());
attributes.put(GoogleDriveAttributes.MIME_TYPE, file.getMimeType());
attributes.put(GoogleDriveAttributes.TIMESTAMP,
String.valueOf(file.getCreatedTime()));
- attributes.put(GoogleDriveAttributes.SIZE,
String.valueOf(file.getSize()));
+ attributes.put(GoogleDriveAttributes.SIZE,
String.valueOf(file.getSize() != null ? file.getSize() : 0L));
+ attributes.put(GoogleDriveAttributes.SIZE_AVAILABLE,
String.valueOf(file.getSize() != null));
return attributes;
}
}
diff --git
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/ListGoogleDrive.java
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/ListGoogleDrive.java
index aea9bd87ed..82f5fef2e2 100644
---
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/ListGoogleDrive.java
+++
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/ListGoogleDrive.java
@@ -69,6 +69,8 @@ import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID_DESC;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.MIME_TYPE_DESC;
import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE;
+import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_AVAILABLE;
+import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_AVAILABLE_DESC;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_DESC;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP_DESC;
@@ -89,6 +91,7 @@ import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTA
@WritesAttribute(attribute = "filename", description = FILENAME_DESC),
@WritesAttribute(attribute = "mime.type", description =
MIME_TYPE_DESC),
@WritesAttribute(attribute = SIZE, description = SIZE_DESC),
+ @WritesAttribute(attribute = SIZE_AVAILABLE, description =
SIZE_AVAILABLE_DESC),
@WritesAttribute(attribute = TIMESTAMP, description = TIMESTAMP_DESC)})
@Stateful(scopes = {Scope.CLUSTER}, description = "The processor stores
necessary data to be able to keep track what files have been listed already." +
" What exactly needs to be stored depends on the 'Listing Strategy'." +
@@ -276,7 +279,8 @@ public class ListGoogleDrive extends
AbstractListProcessor<GoogleDriveFileInfo>
GoogleDriveFileInfo.Builder builder = new
GoogleDriveFileInfo.Builder()
.id(file.getId())
.fileName(file.getName())
- .size(file.getSize())
+ .size(file.getSize() != null ? file.getSize() : 0L)
+ .sizeAvailable(file.getSize() != null)
.createdTime(Optional.ofNullable(file.getCreatedTime()).map(DateTime::getValue).orElse(0L))
.modifiedTime(Optional.ofNullable(file.getModifiedTime()).map(DateTime::getValue).orElse(0L))
.mimeType(file.getMimeType());
diff --git
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/PutGoogleDrive.java
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/PutGoogleDrive.java
index f8a8ba13ef..07797ed603 100644
---
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/PutGoogleDrive.java
+++
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/PutGoogleDrive.java
@@ -88,6 +88,8 @@ import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID_DESC;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.MIME_TYPE_DESC;
import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE;
+import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_AVAILABLE;
+import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_AVAILABLE_DESC;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_DESC;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP;
import static
org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP_DESC;
@@ -103,6 +105,7 @@ import static
org.apache.nifi.processors.gcp.util.GoogleUtils.GCP_CREDENTIALS_PR
@WritesAttribute(attribute = "filename", description = FILENAME_DESC),
@WritesAttribute(attribute = "mime.type", description =
MIME_TYPE_DESC),
@WritesAttribute(attribute = SIZE, description = SIZE_DESC),
+ @WritesAttribute(attribute = SIZE_AVAILABLE, description =
SIZE_AVAILABLE_DESC),
@WritesAttribute(attribute = TIMESTAMP, description = TIMESTAMP_DESC),
@WritesAttribute(attribute = ERROR_CODE, description =
ERROR_CODE_DESC),
@WritesAttribute(attribute = ERROR_MESSAGE, description =
ERROR_MESSAGE_DESC)})
diff --git
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDriveIT.java
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDriveIT.java
index cb750744fe..8b62a4474b 100644
---
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDriveIT.java
+++
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDriveIT.java
@@ -48,6 +48,7 @@ public class FetchGoogleDriveIT extends
AbstractGoogleDriveIT<FetchGoogleDrive>
inputFlowFileAttributes.put(GoogleDriveAttributes.ID, file.getId());
inputFlowFileAttributes.put(GoogleDriveAttributes.FILENAME,
file.getName());
inputFlowFileAttributes.put(GoogleDriveAttributes.SIZE,
valueOf(DEFAULT_FILE_CONTENT.length()));
+ inputFlowFileAttributes.put(GoogleDriveAttributes.SIZE_AVAILABLE,
"true");
inputFlowFileAttributes.put(GoogleDriveAttributes.MIME_TYPE,
"text/plain");
HashSet<Map<String, String>> expectedAttributes = new
HashSet<>(singletonList(inputFlowFileAttributes));
diff --git
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/ListGoogleDriveTestRunnerTest.java
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/ListGoogleDriveTestRunnerTest.java
index f883d2029c..7ea5aa554e 100644
---
a/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/ListGoogleDriveTestRunnerTest.java
+++
b/nifi-extension-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/test/java/org/apache/nifi/processors/gcp/drive/ListGoogleDriveTestRunnerTest.java
@@ -93,6 +93,7 @@ public class ListGoogleDriveTestRunnerTest implements
OutputChecker {
testOutputAsAttributes(id, filename, size, createdTime, modifiedTime,
mimeType, createdTime);
}
+
@Test
void testOutputAsAttributesWhereTimestampIsModifiedTime() throws Exception
{
String id = "id_1";
@@ -105,6 +106,18 @@ public class ListGoogleDriveTestRunnerTest implements
OutputChecker {
testOutputAsAttributes(id, filename, size, createdTime, modifiedTime,
mimeType, modifiedTime);
}
+ @Test
+ void testOutputAsAttributesWhereSizeIsNotAvailable() throws Exception {
+ String id = "id_1";
+ String filename = "file_name_1";
+ Long size = null;
+ Long createdTime = 123456L;
+ Long modifiedTime = 123456L + 1L;
+ String mimeType = "mime_type_1";
+
+ testOutputAsAttributes(id, filename, size, createdTime, modifiedTime,
mimeType, modifiedTime);
+ }
+
@Test
void testOutputAsContent() throws Exception {
String id = "id_1";
@@ -124,6 +137,7 @@ public class ListGoogleDriveTestRunnerTest implements
OutputChecker {
"\"drive.id\":\"" + id + "\"," +
"\"filename\":\"" + filename + "\"," +
"\"drive.size\":" + size + "," +
+ "\"drive.size.available\":" + (size != null) + "," +
"\"drive.timestamp\":" + modifiedTime + "," +
"\"mime.type\":\"" + mimeType + "\"" +
"}" +
@@ -168,7 +182,8 @@ public class ListGoogleDriveTestRunnerTest implements
OutputChecker {
Map<String, String> inputFlowFileAttributes = new HashMap<>();
inputFlowFileAttributes.put(GoogleDriveAttributes.ID, id);
inputFlowFileAttributes.put(GoogleDriveAttributes.FILENAME, filename);
- inputFlowFileAttributes.put(GoogleDriveAttributes.SIZE, valueOf(size));
+ inputFlowFileAttributes.put(GoogleDriveAttributes.SIZE, valueOf(size
!= null ? size : 0L));
+ inputFlowFileAttributes.put(GoogleDriveAttributes.SIZE_AVAILABLE,
valueOf(size != null));
inputFlowFileAttributes.put(GoogleDriveAttributes.TIMESTAMP,
valueOf(expectedTimestamp));
inputFlowFileAttributes.put(GoogleDriveAttributes.MIME_TYPE, mimeType);