This is an automated email from the ASF dual-hosted git repository.

hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git


The following commit(s) were added to refs/heads/main by this push:
     new b6059d9502 GCS folder modified date and config option. fixes #5843 
(#5851)
b6059d9502 is described below

commit b6059d95028cde94efc5ec7a80a9a30b6af753e5
Author: Bart Maertens <[email protected]>
AuthorDate: Tue Oct 21 11:43:50 2025 +0200

    GCS folder modified date and config option. fixes #5843 (#5851)
    
    * GCS folder modified date and config option. fixes #5843
    
    * added internationalization. fixes #5843
    
    * Allow en variable to turn on GCP folder lastmodification date
    
    ---------
    
    Co-authored-by: Hans Van Akelyen <[email protected]>
---
 core/src/main/java/org/apache/hop/core/Const.java  |  7 ++++
 .../apache/hop/vfs/gs/GoogleStorageFileObject.java | 41 ++++++++++++++++++++--
 .../hop/vfs/gs/config/GoogleCloudConfig.java       |  3 ++
 .../hop/vfs/gs/config/GoogleCloudConfigPlugin.java | 26 ++++++++++++++
 .../gs/config/messages/messages_en_US.properties   |  4 ++-
 5 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/hop/core/Const.java 
b/core/src/main/java/org/apache/hop/core/Const.java
index e68397f4c4..e451159332 100644
--- a/core/src/main/java/org/apache/hop/core/Const.java
+++ b/core/src/main/java/org/apache/hop/core/Const.java
@@ -885,6 +885,13 @@ public class Const {
           "A variable to configure the maximum number of characters of text 
that are extracted before an exception is thrown during extracting text from 
documents")
   public static final String HOP_ZIP_MAX_TEXT_SIZE = "HOP_ZIP_MAX_TEXT_SIZE";
 
+  /**
+   * A variable to configure if we should calculate the last modification date 
of a folder object
+   * for Google Cloud Storage.
+   */
+  public static final String HOP_GCP_GET_FOLDER_LASTMODIFICATION_DATE =
+      "HOP_GCP_GET_FOLDER_LASTMODIFICATION_DATE";
+
   /**
    * The default value for the {@link #HOP_ZIP_MAX_TEXT_SIZE} as a Long.
    *
diff --git 
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
 
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
index 002190de27..5f88b9f09e 100644
--- 
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
+++ 
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
@@ -29,6 +29,9 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.time.Instant;
+import java.time.OffsetDateTime;
+import java.time.ZoneOffset;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Objects;
@@ -36,6 +39,9 @@ import org.apache.commons.vfs2.FileObject;
 import org.apache.commons.vfs2.FileType;
 import org.apache.commons.vfs2.provider.AbstractFileName;
 import org.apache.commons.vfs2.provider.AbstractFileObject;
+import org.apache.hop.core.Const;
+import org.apache.hop.vfs.gs.config.GoogleCloudConfig;
+import org.apache.hop.vfs.gs.config.GoogleCloudConfigSingleton;
 
 public class GoogleStorageFileObject extends 
AbstractFileObject<GoogleStorageFileSystem> {
 
@@ -195,9 +201,16 @@ public class GoogleStorageFileObject extends 
AbstractFileObject<GoogleStorageFil
   @Override
   protected long doGetLastModifiedTime() throws Exception {
     if (hasObject()) {
+      GoogleCloudConfig config = GoogleCloudConfigSingleton.getConfig();
       if (isFolder()) {
-        // getting the update time of a folder gives an NPE
-        return 0;
+        // Only return the last modified time for a folder if the user wants 
to scan for it.
+
+        if (Boolean.TRUE.equals(config.getScanFoldersForLastModifDate())
+            || 
Const.toBoolean(System.getenv(Const.HOP_GCP_GET_FOLDER_LASTMODIFICATION_DATE))) 
{
+          return getLatestModifiedFileTime().toInstant().toEpochMilli();
+        } else {
+          return 0;
+        }
       }
       return blob.getUpdateTime();
     }
@@ -300,6 +313,13 @@ public class GoogleStorageFileObject extends 
AbstractFileObject<GoogleStorageFil
     return name;
   }
 
+  String stripLeadingSlash(String name) {
+    if (name.startsWith("/")) {
+      return name.substring(1);
+    }
+    return name;
+  }
+
   String lastPathElement(String name) {
     int idx = name.lastIndexOf('/');
     if (idx > -1) {
@@ -322,4 +342,21 @@ public class GoogleStorageFileObject extends 
AbstractFileObject<GoogleStorageFil
   public int hashCode() {
     return Objects.hash(getName().getPath());
   }
+
+  private OffsetDateTime getLatestModifiedFileTime() {
+    Storage storage = getAbstractFileSystem().setupStorage();
+    OffsetDateTime latest = OffsetDateTime.ofInstant(Instant.EPOCH, 
ZoneOffset.UTC);
+    Page<Blob> page =
+        storage.list(
+            bucketName, 
BlobListOption.prefix(stripLeadingSlash(appendTrailingSlash(bucketPath))));
+    for (Blob blob : page.iterateAll()) {
+      if (!blob.isDirectory()) {
+        OffsetDateTime updated = blob.getUpdateTimeOffsetDateTime();
+        if (updated != null && updated.isAfter(latest)) {
+          latest = updated;
+        }
+      }
+    }
+    return latest;
+  }
 }
diff --git 
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
 
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
index c4a0023b3d..c44e5cbe5c 100644
--- 
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
+++ 
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
@@ -27,6 +27,7 @@ public class GoogleCloudConfig {
   public static final String HOP_CONFIG_GOOGLE_CLOUD_CONFIG_KEY = 
"googleCloud";
 
   private String serviceAccountKeyFile;
+  private Boolean scanFoldersForLastModifDate;
   private String maxAttempts;
   private String initialRetryDelay;
   private String retryDelayMultiplier;
@@ -39,6 +40,7 @@ public class GoogleCloudConfig {
   private String readTimeout;
 
   public GoogleCloudConfig() {
+    scanFoldersForLastModifDate = false;
     maxAttempts = "6";
     initialRetryDelay = "1";
     retryDelayMultiplier = "2.0";
@@ -54,6 +56,7 @@ public class GoogleCloudConfig {
   public GoogleCloudConfig(GoogleCloudConfig config) {
     this();
     serviceAccountKeyFile = config.serviceAccountKeyFile;
+    scanFoldersForLastModifDate = config.scanFoldersForLastModifDate;
     maxAttempts = config.maxAttempts;
     initialRetryDelay = config.initialRetryDelay;
     retryDelayMultiplier = config.retryDelayMultiplier;
diff --git 
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
 
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
index 5780006a3a..4f423b42e9 100644
--- 
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
+++ 
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
@@ -35,6 +35,7 @@ import 
org.apache.hop.ui.core.gui.IGuiPluginCompositeWidgetsListener;
 import org.apache.hop.ui.core.widget.TextVar;
 import org.apache.hop.ui.hopgui.HopGui;
 import 
org.apache.hop.ui.hopgui.perspective.configuration.tabs.ConfigPluginOptionsTab;
+import org.eclipse.swt.widgets.Button;
 import org.eclipse.swt.widgets.Control;
 import picocli.CommandLine;
 
@@ -51,6 +52,8 @@ public class GoogleCloudConfigPlugin implements 
IConfigOptions, IGuiPluginCompos
 
   private static final String WIDGET_ID_GOOGLE_CLOUD_SERVICE_ACCOUNT_KEY_FILE =
       "10000-google-cloud-service-account-key-file";
+  private static final String 
WIDGET_ID_GOOGLE_CLOUD_SERVICE_SCAN_FOLDERS_FOR_MODIF_DATE =
+      "10010-google-cloud-service-scan-folders-for-modification-date";
   private static final String WIDGET_ID_GOOGLE_CLOUD_SERVICE_MAX_ATTEMPTS =
       "10100-google-cloud-service-max-attempts";
   private static final String 
WIDGET_ID_GOOGLE_CLOUD_SERVICE_INITIAL_RETRY_DELAY =
@@ -84,6 +87,15 @@ public class GoogleCloudConfigPlugin implements 
IConfigOptions, IGuiPluginCompos
       description = "Configure the path to a Google Cloud service account JSON 
key file")
   private String serviceAccountKeyFile;
 
+  @GuiWidgetElement(
+      id = WIDGET_ID_GOOGLE_CLOUD_SERVICE_SCAN_FOLDERS_FOR_MODIF_DATE,
+      parentId = ConfigPluginOptionsTab.GUI_WIDGETS_PARENT_ID,
+      type = GuiElementType.CHECKBOX,
+      variables = false,
+      label = 
"i18n::GoogleCloudPlugin.ScanFolderForLastModificationDate.Label",
+      toolTip = 
"i18n::GoogleCloudPlugin.ScanFolderForLastModificationDate.Description")
+  private Boolean scanFoldersForModificationDate;
+
   @GuiWidgetElement(
       id = WIDGET_ID_GOOGLE_CLOUD_SERVICE_MAX_ATTEMPTS,
       parentId = ConfigPluginOptionsTab.GUI_WIDGETS_PARENT_ID,
@@ -184,6 +196,7 @@ public class GoogleCloudConfigPlugin implements 
IConfigOptions, IGuiPluginCompos
 
     GoogleCloudConfig config = GoogleCloudConfigSingleton.getConfig();
     instance.serviceAccountKeyFile = config.getServiceAccountKeyFile();
+    instance.scanFoldersForModificationDate = 
config.getScanFoldersForLastModifDate();
     instance.maxAttempts = config.getMaxAttempts();
     instance.initialRetryDelay = config.getInitialRetryDelay();
     instance.retryDelayMultiplier = config.getRetryDelayMultiplier();
@@ -216,6 +229,14 @@ public class GoogleCloudConfigPlugin implements 
IConfigOptions, IGuiPluginCompos
         changed = true;
       }
 
+      if (scanFoldersForModificationDate != null
+          && scanFoldersForModificationDate.equals(Boolean.TRUE)) {
+        config.setScanFoldersForLastModifDate(scanFoldersForModificationDate);
+        log.logBasic(
+            "Google Cloud Storage service will scan folders for the last file 
modification time.");
+        changed = true;
+      }
+
       if (maxAttempts != null) {
         config.setMaxAttempts(maxAttempts);
         log.logBasic("Google Cloud service max attempts set to " + 
maxAttempts);
@@ -312,6 +333,11 @@ public class GoogleCloudConfigPlugin implements 
IConfigOptions, IGuiPluginCompos
           serviceAccountKeyFile = ((TextVar) control).getText();
           
GoogleCloudConfigSingleton.getConfig().setServiceAccountKeyFile(serviceAccountKeyFile);
           break;
+        case WIDGET_ID_GOOGLE_CLOUD_SERVICE_SCAN_FOLDERS_FOR_MODIF_DATE:
+          scanFoldersForModificationDate = ((Button) control).getSelection();
+          GoogleCloudConfigSingleton.getConfig()
+              .setScanFoldersForLastModifDate(scanFoldersForModificationDate);
+          break;
         case WIDGET_ID_GOOGLE_CLOUD_SERVICE_MAX_ATTEMPTS:
           maxAttempts = ((TextVar) control).getText();
           GoogleCloudConfigSingleton.getConfig().setMaxAttempts(maxAttempts);
diff --git 
a/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
 
b/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
index 99813db674..d0bec7be27 100644
--- 
a/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
+++ 
b/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
@@ -39,4 +39,6 @@ GoogleCloudPlugin.MaxRpcTimeout.Description=Max RPC Timeout
 GoogleCloudPlugin.ConnectTimeout.Label=Connect Timeout
 GoogleCloudPlugin.ConnectTimeout.Description=Connect Timeout
 GoogleCloudPlugin.ReadTimeout.Label=Read Timeout
-GoogleCloudPlugin.ReadTimeout.Description=Read Timeout
\ No newline at end of file
+GoogleCloudPlugin.ReadTimeout.Description=Read Timeout
+GoogleCloudPlugin.ScanFolderForLastModificationDate.Label=Scan folders to find 
last modified data
+GoogleCloudPlugin.ScanFolderForLastModificationDate.Description=Scans all 
files in a folder and uses the most recent modification data as the folder 
modification data. This may have a performance impact in buckets with lots of 
files.
\ No newline at end of file

Reply via email to