This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/main by this push:
new b6059d9502 GCS folder modified date and config option. fixes #5843
(#5851)
b6059d9502 is described below
commit b6059d95028cde94efc5ec7a80a9a30b6af753e5
Author: Bart Maertens <[email protected]>
AuthorDate: Tue Oct 21 11:43:50 2025 +0200
GCS folder modified date and config option. fixes #5843 (#5851)
* GCS folder modified date and config option. fixes #5843
* added internationalization. fixes #5843
* Allow en variable to turn on GCP folder lastmodification date
---------
Co-authored-by: Hans Van Akelyen <[email protected]>
---
core/src/main/java/org/apache/hop/core/Const.java | 7 ++++
.../apache/hop/vfs/gs/GoogleStorageFileObject.java | 41 ++++++++++++++++++++--
.../hop/vfs/gs/config/GoogleCloudConfig.java | 3 ++
.../hop/vfs/gs/config/GoogleCloudConfigPlugin.java | 26 ++++++++++++++
.../gs/config/messages/messages_en_US.properties | 4 ++-
5 files changed, 78 insertions(+), 3 deletions(-)
diff --git a/core/src/main/java/org/apache/hop/core/Const.java
b/core/src/main/java/org/apache/hop/core/Const.java
index e68397f4c4..e451159332 100644
--- a/core/src/main/java/org/apache/hop/core/Const.java
+++ b/core/src/main/java/org/apache/hop/core/Const.java
@@ -885,6 +885,13 @@ public class Const {
"A variable to configure the maximum number of characters of text
that are extracted before an exception is thrown during extracting text from
documents")
public static final String HOP_ZIP_MAX_TEXT_SIZE = "HOP_ZIP_MAX_TEXT_SIZE";
+ /**
+ * A variable to configure if we should calculate the last modification date
of a folder object
+ * for Google Cloud Storage.
+ */
+ public static final String HOP_GCP_GET_FOLDER_LASTMODIFICATION_DATE =
+ "HOP_GCP_GET_FOLDER_LASTMODIFICATION_DATE";
+
/**
* The default value for the {@link #HOP_ZIP_MAX_TEXT_SIZE} as a Long.
*
diff --git
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
index 002190de27..5f88b9f09e 100644
---
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
+++
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
@@ -29,6 +29,9 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.time.Instant;
+import java.time.OffsetDateTime;
+import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
@@ -36,6 +39,9 @@ import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileType;
import org.apache.commons.vfs2.provider.AbstractFileName;
import org.apache.commons.vfs2.provider.AbstractFileObject;
+import org.apache.hop.core.Const;
+import org.apache.hop.vfs.gs.config.GoogleCloudConfig;
+import org.apache.hop.vfs.gs.config.GoogleCloudConfigSingleton;
public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFileSystem> {
@@ -195,9 +201,16 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
@Override
protected long doGetLastModifiedTime() throws Exception {
if (hasObject()) {
+ GoogleCloudConfig config = GoogleCloudConfigSingleton.getConfig();
if (isFolder()) {
- // getting the update time of a folder gives an NPE
- return 0;
+ // Only return the last modified time for a folder if the user wants
to scan for it.
+
+ if (Boolean.TRUE.equals(config.getScanFoldersForLastModifDate())
+ ||
Const.toBoolean(System.getenv(Const.HOP_GCP_GET_FOLDER_LASTMODIFICATION_DATE)))
{
+ return getLatestModifiedFileTime().toInstant().toEpochMilli();
+ } else {
+ return 0;
+ }
}
return blob.getUpdateTime();
}
@@ -300,6 +313,13 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
return name;
}
+ String stripLeadingSlash(String name) {
+ if (name.startsWith("/")) {
+ return name.substring(1);
+ }
+ return name;
+ }
+
String lastPathElement(String name) {
int idx = name.lastIndexOf('/');
if (idx > -1) {
@@ -322,4 +342,21 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
public int hashCode() {
return Objects.hash(getName().getPath());
}
+
+ private OffsetDateTime getLatestModifiedFileTime() {
+ Storage storage = getAbstractFileSystem().setupStorage();
+ OffsetDateTime latest = OffsetDateTime.ofInstant(Instant.EPOCH,
ZoneOffset.UTC);
+ Page<Blob> page =
+ storage.list(
+ bucketName,
BlobListOption.prefix(stripLeadingSlash(appendTrailingSlash(bucketPath))));
+ for (Blob blob : page.iterateAll()) {
+ if (!blob.isDirectory()) {
+ OffsetDateTime updated = blob.getUpdateTimeOffsetDateTime();
+ if (updated != null && updated.isAfter(latest)) {
+ latest = updated;
+ }
+ }
+ }
+ return latest;
+ }
}
diff --git
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
index c4a0023b3d..c44e5cbe5c 100644
---
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
+++
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
@@ -27,6 +27,7 @@ public class GoogleCloudConfig {
public static final String HOP_CONFIG_GOOGLE_CLOUD_CONFIG_KEY =
"googleCloud";
private String serviceAccountKeyFile;
+ private Boolean scanFoldersForLastModifDate;
private String maxAttempts;
private String initialRetryDelay;
private String retryDelayMultiplier;
@@ -39,6 +40,7 @@ public class GoogleCloudConfig {
private String readTimeout;
public GoogleCloudConfig() {
+ scanFoldersForLastModifDate = false;
maxAttempts = "6";
initialRetryDelay = "1";
retryDelayMultiplier = "2.0";
@@ -54,6 +56,7 @@ public class GoogleCloudConfig {
public GoogleCloudConfig(GoogleCloudConfig config) {
this();
serviceAccountKeyFile = config.serviceAccountKeyFile;
+ scanFoldersForLastModifDate = config.scanFoldersForLastModifDate;
maxAttempts = config.maxAttempts;
initialRetryDelay = config.initialRetryDelay;
retryDelayMultiplier = config.retryDelayMultiplier;
diff --git
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
index 5780006a3a..4f423b42e9 100644
---
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
+++
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
@@ -35,6 +35,7 @@ import
org.apache.hop.ui.core.gui.IGuiPluginCompositeWidgetsListener;
import org.apache.hop.ui.core.widget.TextVar;
import org.apache.hop.ui.hopgui.HopGui;
import
org.apache.hop.ui.hopgui.perspective.configuration.tabs.ConfigPluginOptionsTab;
+import org.eclipse.swt.widgets.Button;
import org.eclipse.swt.widgets.Control;
import picocli.CommandLine;
@@ -51,6 +52,8 @@ public class GoogleCloudConfigPlugin implements
IConfigOptions, IGuiPluginCompos
private static final String WIDGET_ID_GOOGLE_CLOUD_SERVICE_ACCOUNT_KEY_FILE =
"10000-google-cloud-service-account-key-file";
+ private static final String
WIDGET_ID_GOOGLE_CLOUD_SERVICE_SCAN_FOLDERS_FOR_MODIF_DATE =
+ "10010-google-cloud-service-scan-folders-for-modification-date";
private static final String WIDGET_ID_GOOGLE_CLOUD_SERVICE_MAX_ATTEMPTS =
"10100-google-cloud-service-max-attempts";
private static final String
WIDGET_ID_GOOGLE_CLOUD_SERVICE_INITIAL_RETRY_DELAY =
@@ -84,6 +87,15 @@ public class GoogleCloudConfigPlugin implements
IConfigOptions, IGuiPluginCompos
description = "Configure the path to a Google Cloud service account JSON
key file")
private String serviceAccountKeyFile;
+ @GuiWidgetElement(
+ id = WIDGET_ID_GOOGLE_CLOUD_SERVICE_SCAN_FOLDERS_FOR_MODIF_DATE,
+ parentId = ConfigPluginOptionsTab.GUI_WIDGETS_PARENT_ID,
+ type = GuiElementType.CHECKBOX,
+ variables = false,
+ label =
"i18n::GoogleCloudPlugin.ScanFolderForLastModificationDate.Label",
+ toolTip =
"i18n::GoogleCloudPlugin.ScanFolderForLastModificationDate.Description")
+ private Boolean scanFoldersForModificationDate;
+
@GuiWidgetElement(
id = WIDGET_ID_GOOGLE_CLOUD_SERVICE_MAX_ATTEMPTS,
parentId = ConfigPluginOptionsTab.GUI_WIDGETS_PARENT_ID,
@@ -184,6 +196,7 @@ public class GoogleCloudConfigPlugin implements
IConfigOptions, IGuiPluginCompos
GoogleCloudConfig config = GoogleCloudConfigSingleton.getConfig();
instance.serviceAccountKeyFile = config.getServiceAccountKeyFile();
+ instance.scanFoldersForModificationDate =
config.getScanFoldersForLastModifDate();
instance.maxAttempts = config.getMaxAttempts();
instance.initialRetryDelay = config.getInitialRetryDelay();
instance.retryDelayMultiplier = config.getRetryDelayMultiplier();
@@ -216,6 +229,14 @@ public class GoogleCloudConfigPlugin implements
IConfigOptions, IGuiPluginCompos
changed = true;
}
+ if (scanFoldersForModificationDate != null
+ && scanFoldersForModificationDate.equals(Boolean.TRUE)) {
+ config.setScanFoldersForLastModifDate(scanFoldersForModificationDate);
+ log.logBasic(
+ "Google Cloud Storage service will scan folders for the last file
modification time.");
+ changed = true;
+ }
+
if (maxAttempts != null) {
config.setMaxAttempts(maxAttempts);
log.logBasic("Google Cloud service max attempts set to " +
maxAttempts);
@@ -312,6 +333,11 @@ public class GoogleCloudConfigPlugin implements
IConfigOptions, IGuiPluginCompos
serviceAccountKeyFile = ((TextVar) control).getText();
GoogleCloudConfigSingleton.getConfig().setServiceAccountKeyFile(serviceAccountKeyFile);
break;
+ case WIDGET_ID_GOOGLE_CLOUD_SERVICE_SCAN_FOLDERS_FOR_MODIF_DATE:
+ scanFoldersForModificationDate = ((Button) control).getSelection();
+ GoogleCloudConfigSingleton.getConfig()
+ .setScanFoldersForLastModifDate(scanFoldersForModificationDate);
+ break;
case WIDGET_ID_GOOGLE_CLOUD_SERVICE_MAX_ATTEMPTS:
maxAttempts = ((TextVar) control).getText();
GoogleCloudConfigSingleton.getConfig().setMaxAttempts(maxAttempts);
diff --git
a/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
b/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
index 99813db674..d0bec7be27 100644
---
a/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
+++
b/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
@@ -39,4 +39,6 @@ GoogleCloudPlugin.MaxRpcTimeout.Description=Max RPC Timeout
GoogleCloudPlugin.ConnectTimeout.Label=Connect Timeout
GoogleCloudPlugin.ConnectTimeout.Description=Connect Timeout
GoogleCloudPlugin.ReadTimeout.Label=Read Timeout
-GoogleCloudPlugin.ReadTimeout.Description=Read Timeout
\ No newline at end of file
+GoogleCloudPlugin.ReadTimeout.Description=Read Timeout
+GoogleCloudPlugin.ScanFolderForLastModificationDate.Label=Scan folders to find
last modified data
+GoogleCloudPlugin.ScanFolderForLastModificationDate.Description=Scans all
files in a folder and uses the most recent modification data as the folder
modification data. This may have a performance impact in buckets with lots of
files.
\ No newline at end of file