This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/main by this push:
new 2d94d26988 Add caching to speed up file listing, fixes #4065 (#6652)
2d94d26988 is described below
commit 2d94d269883a2fcf65b60e6040266005f3ded2b1
Author: Hans Van Akelyen <[email protected]>
AuthorDate: Wed Feb 25 17:32:26 2026 +0100
Add caching to speed up file listing, fixes #4065 (#6652)
---
.../org/apache/hop/vfs/s3/metadata/S3AuthType.java | 8 +-
.../org/apache/hop/vfs/s3/metadata/S3Meta.java | 5 +-
.../apache/hop/vfs/s3/metadata/S3MetaEditor.java | 10 +
.../apache/hop/vfs/s3/s3/vfs/S3FileProvider.java | 16 +-
.../s3/s3common/S3ClientWithAnonymousFallback.java | 78 ++++++
.../hop/vfs/s3/s3common/S3CommonFileObject.java | 49 +++-
.../hop/vfs/s3/s3common/S3CommonFileSystem.java | 73 ++++--
.../s3common/S3CommonFileSystemConfigBuilder.java | 10 +
.../vfs/s3/s3common/S3CommonPipedOutputStream.java | 1 +
.../s3/metadata/messages/messages_en_US.properties | 1 +
.../apache/hop/vfs/s3/vfs/S3FileProviderTest.java | 11 +
.../org/apache/hop/vfs/azure/AzureFileObject.java | 195 ++++++++++-----
.../apache/hop/vfs/azure/AzureFileProvider.java | 6 +
.../org/apache/hop/vfs/azure/AzureFileSystem.java | 31 +++
.../org/apache/hop/vfs/azure/AzureListCache.java | 121 +++++++++
.../vfs/azure/metadatatype/AzureMetadataType.java | 4 +
.../metadatatype/AzureMetadataTypeEditor.java | 24 ++
.../messages/messages_en_US.properties | 2 +
.../apache/hop/vfs/azure/AzureListCacheTest.java | 270 ++++++++++++++++++++
.../hop/vfs/googledrive/GoogleDriveFileObject.java | 6 +-
.../apache/hop/vfs/gs/GoogleStorageFileObject.java | 91 ++++++-
.../apache/hop/vfs/gs/GoogleStorageFileSystem.java | 31 +++
.../apache/hop/vfs/gs/GoogleStorageListCache.java | 120 +++++++++
.../hop/vfs/gs/config/GoogleCloudConfig.java | 5 +
.../hop/vfs/gs/config/GoogleCloudConfigPlugin.java | 22 ++
.../gs/config/messages/messages_en_US.properties | 2 +
.../hop/vfs/gs/GoogleStorageListCacheTest.java | 272 +++++++++++++++++++++
.../org/apache/hop/vfs/minio/MinioFileObject.java | 74 ++++--
.../apache/hop/vfs/minio/MinioFileProvider.java | 5 +
.../org/apache/hop/vfs/minio/MinioFileSystem.java | 31 +++
.../org/apache/hop/vfs/minio/MinioListCache.java | 120 +++++++++
.../apache/hop/vfs/minio/metadata/MinioMeta.java | 11 +
.../hop/vfs/minio/util/MinioPipedOutputStream.java | 2 +
.../metadata/messages/messages_en_US.properties | 2 +
.../apache/hop/vfs/minio/MinioFileSystemTest.java | 61 +++++
.../apache/hop/vfs/minio/MinioListCacheTest.java | 259 ++++++++++++++++++++
.../org/apache/hop/vfs/minio/MinioMetaTest.java | 19 ++
37 files changed, 1930 insertions(+), 118 deletions(-)
diff --git
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3AuthType.java
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3AuthType.java
index cbd2a00c44..a4a50bcd2e 100644
---
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3AuthType.java
+++
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3AuthType.java
@@ -19,10 +19,14 @@ package org.apache.hop.vfs.s3.metadata;
/** S3 authentication method. */
public enum S3AuthType {
- /** Default credential chain (environment, instance profile, etc.) */
+ /**
+ * Default credential chain (environment, instance profile, etc.) with
anonymous fallback on 403
+ */
DEFAULT,
/** Explicit access key and secret (and optional session token) */
ACCESS_KEYS,
/** AWS credentials file with optional profile name */
- CREDENTIALS_FILE
+ CREDENTIALS_FILE,
+ /** Anonymous access only (for public buckets; no credentials) */
+ ANONYMOUS
}
diff --git
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3Meta.java
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3Meta.java
index ca1428fd08..2b3fe399b0 100644
--- a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3Meta.java
+++ b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3Meta.java
@@ -64,7 +64,10 @@ public class S3Meta extends HopMetadataBase implements
Serializable, IHopMetadat
/** Cache TTL in seconds for list-result caching (avoids redundant
headObject calls). */
@HopMetadataProperty private String cacheTtlSeconds;
- /** Authentication type: {@link S3AuthType#name()} (Default, ACCESS_KEYS,
CREDENTIALS_FILE). */
+ /**
+ * Authentication type: {@link S3AuthType#name()} (Default, ACCESS_KEYS,
CREDENTIALS_FILE,
+ * ANONYMOUS).
+ */
@HopMetadataProperty private String authenticationType;
public S3Meta() {
diff --git
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3MetaEditor.java
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3MetaEditor.java
index 92a19827d4..2cb459b688 100644
---
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3MetaEditor.java
+++
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/metadata/S3MetaEditor.java
@@ -217,6 +217,7 @@ public class S3MetaEditor extends MetadataEditor<S3Meta> {
BaseMessages.getString(PKG, "S3Meta.AuthType.Default"),
BaseMessages.getString(PKG, "S3Meta.AuthType.AccessKeys"),
BaseMessages.getString(PKG, "S3Meta.AuthType.CredentialsFile"),
+ BaseMessages.getString(PKG, "S3Meta.AuthType.Anonymous"),
});
FormData fdAuthType = new FormData();
fdAuthType.top = new FormAttachment(wlAuthType, 0, SWT.CENTER);
@@ -496,6 +497,8 @@ public class S3MetaEditor extends MetadataEditor<S3Meta> {
authIdx = 1;
} else if (S3AuthType.CREDENTIALS_FILE.name().equals(authType)) {
authIdx = 2;
+ } else if (S3AuthType.ANONYMOUS.name().equals(authType)) {
+ authIdx = 3;
}
wAuthType.select(authIdx);
@@ -532,6 +535,13 @@ public class S3MetaEditor extends MetadataEditor<S3Meta> {
meta.setSessionToken(null);
meta.setCredentialsFile(wCredentialsFile.getText());
meta.setProfileName(wProfileName.getText());
+ } else if (authIdx == 3) {
+ meta.setAuthenticationType(S3AuthType.ANONYMOUS.name());
+ meta.setAccessKey(null);
+ meta.setSecretKey(null);
+ meta.setSessionToken(null);
+ meta.setCredentialsFile(null);
+ meta.setProfileName(null);
} else {
meta.setAuthenticationType(S3AuthType.DEFAULT.name());
meta.setAccessKey(null);
diff --git
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3/vfs/S3FileProvider.java
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3/vfs/S3FileProvider.java
index b35df18fa5..b195f5683f 100644
---
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3/vfs/S3FileProvider.java
+++
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3/vfs/S3FileProvider.java
@@ -56,8 +56,15 @@ public class S3FileProvider extends S3CommonFileProvider {
@Override
public FileSystem doCreateFileSystem(
final FileName name, final FileSystemOptions fileSystemOptions) {
- FileSystemOptions options =
- fileSystemOptions != null ? fileSystemOptions :
getDefaultFileSystemOptions();
+ // Named connections (s3test:// etc.) must use a dedicated options
instance so we never
+ // mutate the default options used by s3://. Otherwise using s3test://
would overwrite
+ // the default S3 configuration.
+ final FileSystemOptions options;
+ if (s3Meta != null && variables != null) {
+ options = new FileSystemOptions();
+ } else {
+ options = fileSystemOptions != null ? fileSystemOptions :
getDefaultFileSystemOptions();
+ }
if (s3Meta != null && variables != null) {
S3CommonFileSystemConfigBuilder config = new
S3CommonFileSystemConfigBuilder(options);
@@ -68,6 +75,8 @@ public class S3FileProvider extends S3CommonFileProvider {
authType = S3AuthType.ACCESS_KEYS.name();
} else if (StringUtils.isNotEmpty(s3Meta.getCredentialsFile())) {
authType = S3AuthType.CREDENTIALS_FILE.name();
+ } else if
(S3AuthType.ANONYMOUS.name().equals(s3Meta.getAuthenticationType())) {
+ authType = S3AuthType.ANONYMOUS.name();
} else {
authType = S3AuthType.DEFAULT.name();
}
@@ -108,13 +117,14 @@ public class S3FileProvider extends S3CommonFileProvider {
if (StringUtils.isNotEmpty(profileName)) {
config.setProfileName(profileName);
}
+ } else if (S3AuthType.ANONYMOUS.name().equals(authType)) {
+ config.setUseAnonymousAccess(true);
}
config.setPathStyleAccess(String.valueOf(s3Meta.isPathStyleAccess()));
String cacheTtl = variables.resolve(s3Meta.getCacheTtlSeconds());
if (StringUtils.isNotEmpty(cacheTtl)) {
config.setCacheTtlSeconds(cacheTtl);
}
- options = config.getFileSystemOptions();
}
return new S3FileSystem(name, options);
diff --git
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3ClientWithAnonymousFallback.java
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3ClientWithAnonymousFallback.java
new file mode 100644
index 0000000000..005bf147aa
--- /dev/null
+++
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3ClientWithAnonymousFallback.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hop.vfs.s3.s3common;
+
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.S3Exception;
+
+/**
+ * Wraps an S3 client so that when a call fails with 403 Access Denied, the
same call is retried
+ * using an anonymous (no-credentials) client. This allows using the default
credential chain (env,
+ * IAM, profile) while still succeeding for public buckets when those
credentials lack access.
+ */
+public final class S3ClientWithAnonymousFallback implements InvocationHandler {
+
+ private final S3Client primary;
+ private final S3Client anonymous;
+
+ private S3ClientWithAnonymousFallback(S3Client primary, S3Client anonymous) {
+ this.primary = primary;
+ this.anonymous = anonymous;
+ }
+
+ @Override
+ public Object invoke(Object proxy, Method method, Object[] args) throws
Throwable {
+ try {
+ return method.invoke(primary, args);
+ } catch (InvocationTargetException e) {
+ Throwable cause = e.getCause();
+ if (isAccessDenied(cause)) {
+ try {
+ return method.invoke(anonymous, args);
+ } catch (InvocationTargetException e2) {
+ throw e2.getCause() != null ? e2.getCause() : e2;
+ }
+ }
+ throw cause;
+ }
+ }
+
+ private static boolean isAccessDenied(Throwable t) {
+ if (!(t instanceof S3Exception)) {
+ return false;
+ }
+ Integer code = ((S3Exception) t).statusCode();
+ return code != null && code == 403;
+ }
+
+ /**
+ * Returns an S3Client that delegates to {@code primary} and on 403 retries
the same call with
+ * {@code anonymous}. Both clients must use the same region/endpoint
configuration.
+ */
+ public static S3Client create(S3Client primary, S3Client anonymous) {
+ return (S3Client)
+ Proxy.newProxyInstance(
+ S3Client.class.getClassLoader(),
+ new Class<?>[] {S3Client.class},
+ new S3ClientWithAnonymousFallback(primary, anonymous));
+ }
+}
diff --git
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileObject.java
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileObject.java
index dab53fb61f..8ffeb7df33 100644
---
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileObject.java
+++
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileObject.java
@@ -287,7 +287,8 @@ public abstract class S3CommonFileObject extends
AbstractFileObject {
/**
* headObject failed with a non-404 error (e.g. 400 from cross-region). Use
listObjectsV2 (which
- * handles cross-region) to determine if the path is a file, folder, or
imaginary.
+ * handles cross-region) to determine if the path is a file, folder, or
imaginary. If list returns
+ * 403 (e.g. public bucket with GetObject but not ListBucket), try getObject
for the exact key.
*/
private void resolveTypeViaList(String key, String bucket, String
keyWithDelimiter)
throws FileSystemException {
@@ -323,8 +324,50 @@ public abstract class S3CommonFileObject extends
AbstractFileObject {
}
}
} catch (S3Exception listError) {
- LogChannel.GENERAL.logError("Could not get information on " +
getQualifiedName(), listError);
- throw new FileSystemException("vfs.provider/get-type.error",
getQualifiedName(), listError);
+ if (isAccessDenied(listError)) {
+ resolveTypeViaGetObject(key, bucket);
+ } else {
+ LogChannel.GENERAL.logError(
+ "Could not get information on " + getQualifiedName(), listError);
+ throw new FileSystemException("vfs.provider/get-type.error",
getQualifiedName(), listError);
+ }
+ }
+ }
+
+ private static boolean isAccessDenied(S3Exception e) {
+ Integer code = e.statusCode();
+ return code != null && code == 403;
+ }
+
+ /**
+ * List returned 403 (e.g. public bucket that allows GetObject but not
ListBucket). Try getObject
+ * for the exact key; if it succeeds, treat as file.
+ */
+ private void resolveTypeViaGetObject(String key, String bucket) throws
FileSystemException {
+ try {
+ ResponseInputStream<GetObjectResponse> stream =
+ fileSystem
+ .getS3Client()
+
.getObject(GetObjectRequest.builder().bucket(bucket).key(key).build());
+ try {
+ GetObjectResponse r = stream.response();
+ contentLength = r.contentLength();
+ lastModified = r.lastModified();
+ injectType(getName().getType());
+ } finally {
+ stream.close();
+ }
+ } catch (S3Exception e) {
+ LogChannel.GENERAL.logError("Could not get information on " +
getQualifiedName(), e);
+ throw new FileSystemException("vfs.provider/get-type.error",
getQualifiedName(), e);
+ } catch (IOException e) {
+ throw new FileSystemException("vfs.provider/get-type.error",
getQualifiedName(), e);
+ } finally {
+ try {
+ closeS3Object();
+ } catch (IOException e) {
+ LogChannel.GENERAL.logDebug("Error closing S3 object", e);
+ }
}
}
diff --git
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileSystem.java
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileSystem.java
index 0ee9944be0..ae3feaee2b 100644
---
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileSystem.java
+++
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileSystem.java
@@ -27,6 +27,7 @@ import org.apache.commons.vfs2.provider.AbstractFileName;
import org.apache.commons.vfs2.provider.AbstractFileSystem;
import org.apache.hop.core.logging.LogChannel;
import org.apache.hop.vfs.s3.amazon.s3.S3Util;
+import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
import software.amazon.awssdk.auth.credentials.AwsSessionCredentials;
@@ -117,32 +118,76 @@ public abstract class S3CommonFileSystem extends
AbstractFileSystem {
}
boolean crossRegion = S3Util.isEmpty(region);
- S3ClientBuilder builder =
- S3Client.builder()
- .crossRegionAccessEnabled(crossRegion)
- .region(awsRegion)
- .credentialsProvider(
- credentialsProvider != null
- ? credentialsProvider
- : DefaultCredentialsProvider.create());
-
- if (!S3Util.isEmpty(endpoint)) {
-
builder.endpointOverride(URI.create(endpoint)).forcePathStyle(pathStyle);
- }
+ if (cfg.getUseAnonymousAccess()) {
+ // Named connection with "Anonymous access": single client, no
credentials
+ S3ClientBuilder anonymousBuilder =
+ S3Client.builder()
+ .crossRegionAccessEnabled(crossRegion)
+ .region(awsRegion)
+ .credentialsProvider(AnonymousCredentialsProvider.create());
+ if (!S3Util.isEmpty(endpoint)) {
+
anonymousBuilder.endpointOverride(URI.create(endpoint)).forcePathStyle(pathStyle);
+ }
+ client = anonymousBuilder.build();
+ } else if (credentialsProvider != null) {
+ // Explicit credentials: single client
+ S3ClientBuilder builder =
+ S3Client.builder()
+ .crossRegionAccessEnabled(crossRegion)
+ .region(awsRegion)
+ .credentialsProvider(credentialsProvider);
+
+ if (!S3Util.isEmpty(endpoint)) {
+
builder.endpointOverride(URI.create(endpoint)).forcePathStyle(pathStyle);
+ }
+
+ client = builder.build();
+ } else {
+ // Default client: use default credential chain first, fall back to
anonymous on 403.
+ // This keeps the behavior that worked in the past (public buckets
work without
+ // credentials).
+ S3ClientBuilder baseBuilder =
+
S3Client.builder().crossRegionAccessEnabled(crossRegion).region(awsRegion);
+
+ if (!S3Util.isEmpty(endpoint)) {
+
baseBuilder.endpointOverride(URI.create(endpoint)).forcePathStyle(pathStyle);
+ }
- client = builder.build();
+ S3Client primaryClient =
+
baseBuilder.credentialsProvider(DefaultCredentialsProvider.create()).build();
+
+ S3ClientBuilder anonymousBuilder =
+ S3Client.builder()
+ .crossRegionAccessEnabled(crossRegion)
+ .region(awsRegion)
+ .credentialsProvider(AnonymousCredentialsProvider.create());
+ if (!S3Util.isEmpty(endpoint)) {
+
anonymousBuilder.endpointOverride(URI.create(endpoint)).forcePathStyle(pathStyle);
+ }
+ S3Client anonymousClient = anonymousBuilder.build();
+
+ client = S3ClientWithAnonymousFallback.create(primaryClient,
anonymousClient);
+ }
}
if (client == null || hasClientChangedCredentials()) {
try {
+ // No options (e.g. plain s3://): default chain with anonymous
fallback on 403
String regionStr = System.getenv(S3Util.AWS_REGION);
Region region = regionStr != null ? Region.of(regionStr) :
Region.US_EAST_1;
boolean crossRegion = S3Util.isEmpty(regionStr);
- client =
+ S3Client primaryClient =
S3Client.builder()
.crossRegionAccessEnabled(crossRegion)
.region(region)
.credentialsProvider(DefaultCredentialsProvider.create())
.build();
+ S3Client anonymousClient =
+ S3Client.builder()
+ .crossRegionAccessEnabled(crossRegion)
+ .region(region)
+ .credentialsProvider(AnonymousCredentialsProvider.create())
+ .build();
+ client = S3ClientWithAnonymousFallback.create(primaryClient,
anonymousClient);
awsAccessKeyCache =
System.getProperty(S3Util.ACCESS_KEY_SYSTEM_PROPERTY);
awsSecretKeyCache =
System.getProperty(S3Util.SECRET_KEY_SYSTEM_PROPERTY);
} catch (Throwable t) {
diff --git
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileSystemConfigBuilder.java
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileSystemConfigBuilder.java
index ddaa6dfc6f..39296cf32b 100644
---
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileSystemConfigBuilder.java
+++
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonFileSystemConfigBuilder.java
@@ -36,6 +36,7 @@ public class S3CommonFileSystemConfigBuilder extends
FileSystemConfigBuilder {
private static final String ENDPOINT = "endpoint";
private static final String PATH_STYLE_ACCESS = "pathSyleAccess";
private static final String CACHE_TTL_SECONDS = "cacheTtlSeconds";
+ private static final String USE_ANONYMOUS_ACCESS = "useAnonymousAccess";
@Getter @Setter private FileSystemOptions fileSystemOptions;
@@ -115,6 +116,15 @@ public class S3CommonFileSystemConfigBuilder extends
FileSystemConfigBuilder {
return (String) this.getParam(getFileSystemOptions(), CACHE_TTL_SECONDS);
}
+ public void setUseAnonymousAccess(boolean useAnonymousAccess) {
+ this.setParam(getFileSystemOptions(), USE_ANONYMOUS_ACCESS,
useAnonymousAccess);
+ }
+
+ public boolean getUseAnonymousAccess() {
+ Boolean value = (Boolean) this.getParam(getFileSystemOptions(),
USE_ANONYMOUS_ACCESS);
+ return Boolean.TRUE.equals(value);
+ }
+
@Override
protected Class<? extends FileSystem> getConfigClass() {
return S3FileSystem.class;
diff --git
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonPipedOutputStream.java
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonPipedOutputStream.java
index db1f66b969..e62f195613 100644
---
a/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonPipedOutputStream.java
+++
b/plugins/tech/aws/src/main/java/org/apache/hop/vfs/s3/s3common/S3CommonPipedOutputStream.java
@@ -103,6 +103,7 @@ public class S3CommonPipedOutputStream extends
PipedOutputStream {
@Override
public void close() throws IOException {
+ initializeWrite();
super.close();
if (initialized && isBlockedUntilDone()) {
while (!result.isDone()) {
diff --git
a/plugins/tech/aws/src/main/resources/org/apache/hop/vfs/s3/metadata/messages/messages_en_US.properties
b/plugins/tech/aws/src/main/resources/org/apache/hop/vfs/s3/metadata/messages/messages_en_US.properties
index 80328ea054..b46d20119d 100644
---
a/plugins/tech/aws/src/main/resources/org/apache/hop/vfs/s3/metadata/messages/messages_en_US.properties
+++
b/plugins/tech/aws/src/main/resources/org/apache/hop/vfs/s3/metadata/messages/messages_en_US.properties
@@ -45,3 +45,4 @@ S3Meta.AuthType.Label = Authentication
S3Meta.AuthType.Default = Default (environment / instance profile)
S3Meta.AuthType.AccessKeys = Access key and secret
S3Meta.AuthType.CredentialsFile = Credentials file / profile
+S3Meta.AuthType.Anonymous = Anonymous (public buckets only)
diff --git
a/plugins/tech/aws/src/test/java/org/apache/hop/vfs/s3/vfs/S3FileProviderTest.java
b/plugins/tech/aws/src/test/java/org/apache/hop/vfs/s3/vfs/S3FileProviderTest.java
index 4dfad3960e..c099ce27b5 100644
---
a/plugins/tech/aws/src/test/java/org/apache/hop/vfs/s3/vfs/S3FileProviderTest.java
+++
b/plugins/tech/aws/src/test/java/org/apache/hop/vfs/s3/vfs/S3FileProviderTest.java
@@ -167,4 +167,15 @@ class S3FileProviderTest {
FileSystem fs = namedProvider.doCreateFileSystem(fileName, null);
assertNotNull(fs);
}
+
+ @Test
+ void testDoCreateFileSystemWithAnonymousAuth() {
+ S3Meta meta = new S3Meta();
+ meta.setAuthenticationType(S3AuthType.ANONYMOUS.name());
+ meta.setRegion("us-east-1");
+
+ S3FileProvider namedProvider = new S3FileProvider(variables, meta);
+ FileSystem fs = namedProvider.doCreateFileSystem(fileName, null);
+ assertNotNull(fs);
+ }
}
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileObject.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileObject.java
index 7532546074..ed0c17a18c 100644
---
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileObject.java
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileObject.java
@@ -32,8 +32,11 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.time.Duration;
+import java.time.Instant;
import java.util.ArrayList;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.vfs2.FileObject;
@@ -155,16 +158,27 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
type = FileType.FOLDER;
ListPathsOptions rootLpo = new ListPathsOptions();
rootLpo.setRecursive(false);
+ Map<String, AzureListCache.ChildInfo> cacheEntries = new
LinkedHashMap<>();
fileSystemClient
.listPaths(rootLpo, null)
.forEach(
pi -> {
String childName = pi.getName();
- // Only add non-empty, valid child names
if (!childName.isEmpty() && !childName.equals(".") &&
!childName.equals("..")) {
children.add(childName);
+ cacheEntries.put(
+ childName,
+ new AzureListCache.ChildInfo(
+ Boolean.TRUE.equals(pi.isDirectory()) ?
FileType.FOLDER : FileType.FILE,
+ pi.getContentLength(),
+ pi.getLastModified() != null
+ ? pi.getLastModified().toInstant()
+ : Instant.EPOCH));
}
});
+ if (!cacheEntries.isEmpty()) {
+ getAbstractFileSystem().putListCache(containerName, "",
cacheEntries);
+ }
} else {
type = FileType.IMAGINARY;
throw new HopException("Container does not exist: " + fullPath);
@@ -176,80 +190,124 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
type = FileType.FOLDER;
ListPathsOptions rootLpo = new ListPathsOptions();
rootLpo.setRecursive(false);
+ Map<String, AzureListCache.ChildInfo> cacheEntries = new
LinkedHashMap<>();
fileSystemClient
.listPaths(rootLpo, null)
.forEach(
pi -> {
String childName = pi.getName();
- // Only add non-empty, valid child names
if (!childName.isEmpty() && !childName.equals(".") &&
!childName.equals("..")) {
children.add(childName);
+ cacheEntries.put(
+ childName,
+ new AzureListCache.ChildInfo(
+ Boolean.TRUE.equals(pi.isDirectory()) ?
FileType.FOLDER : FileType.FILE,
+ pi.getContentLength(),
+ pi.getLastModified() != null
+ ? pi.getLastModified().toInstant()
+ : Instant.EPOCH));
}
});
+ if (!cacheEntries.isEmpty()) {
+ getAbstractFileSystem().putListCache(containerName, "",
cacheEntries);
+ }
} else {
lpo.setPath(currentFilePath);
- DataLakeDirectoryClient directoryClient =
- fileSystemClient.getDirectoryClient(currentFilePath);
- final Boolean exists = directoryClient.exists();
-
- final Boolean isDirectory =
- exists
- && fileSystemClient
- .getDirectoryClient(currentFilePath)
- .getProperties()
- .isDirectory();
- final Boolean isFile = !isDirectory;
- if (exists && isDirectory) {
- children = new ArrayList<>();
- lpo.setRecursive(false); // Only get immediate children, not
recursive
- PagedIterable<PathItem> pathItems = fileSystemClient.listPaths(lpo,
null);
-
- // Normalize the current path for comparison
- final String normalizedCurrentPath;
- String tempPath = StringUtils.removeStart(currentFilePath, "/");
- if (!tempPath.isEmpty() && !tempPath.endsWith("/")) {
- normalizedCurrentPath = tempPath + "/";
- } else {
- normalizedCurrentPath = tempPath;
- }
- pathItems.forEach(
- item -> {
- String itemName = item.getName();
- String childName;
-
- // Remove the current directory prefix to get the child name
- if (!normalizedCurrentPath.isEmpty()
- && itemName.startsWith(normalizedCurrentPath)) {
- childName =
itemName.substring(normalizedCurrentPath.length());
- } else {
- childName = itemName;
- }
+ String strippedPath = StringUtils.removeStart(currentFilePath, "/");
+ String parentPrefix = AzureListCache.parentPrefix(strippedPath);
+ AzureListCache.ChildInfo cached =
+ getAbstractFileSystem().getFromListCache(containerName,
parentPrefix, strippedPath);
- // Remove any leading slashes
- childName = StringUtils.removeStart(childName, "/");
+ if (cached != null && cached.type == FileType.FILE) {
+ type = FileType.FILE;
+ size = cached.size;
+ lastModified = cached.lastModified != null ?
cached.lastModified.toEpochMilli() : 0;
+ dataLakeFileClient = fileSystemClient.getFileClient(currentFilePath);
+ attached = true;
+ return;
+ }
- // Only add non-empty, valid child names
- if (!childName.isEmpty() && !childName.equals(".") &&
!childName.equals("..")) {
- children.add(childName);
- }
- });
- size = children.size();
+ boolean knownFolder = cached != null && cached.type == FileType.FOLDER;
+
+ if (knownFolder) {
+ type = FileType.FOLDER;
+ lastModified = cached.lastModified != null ?
cached.lastModified.toEpochMilli() : 0;
+ }
+
+ if (!knownFolder) {
+ DataLakeDirectoryClient directoryClient =
+ fileSystemClient.getDirectoryClient(currentFilePath);
+ final Boolean exists = directoryClient.exists();
+
+ final Boolean isDirectory =
+ exists
+ && fileSystemClient
+ .getDirectoryClient(currentFilePath)
+ .getProperties()
+ .isDirectory();
+ final Boolean isFile = !isDirectory;
+ if (exists && isFile) {
+ dataLakeFileClient =
fileSystemClient.getFileClient(currentFilePath);
+ size = dataLakeFileClient.getProperties().getFileSize();
+ type = FileType.FILE;
+ lastModified =
+
dataLakeFileClient.getProperties().getLastModified().toEpochSecond() * 1000L;
+ return;
+ } else if (!exists) {
+ lastModified = 0;
+ type = FileType.IMAGINARY;
+ size = 0;
+ pathItem = null;
+ dirPathItem = null;
+ return;
+ }
type = FileType.FOLDER;
lastModified =
directoryClient.getProperties().getLastModified().toEpochSecond() * 1000L;
- } else if (exists && isFile) {
- dataLakeFileClient = fileSystemClient.getFileClient(currentFilePath);
- size = dataLakeFileClient.getProperties().getFileSize();
- type = FileType.FILE;
- lastModified =
-
dataLakeFileClient.getProperties().getLastModified().toEpochSecond() * 1000L;
+ }
+
+ children = new ArrayList<>();
+ lpo.setRecursive(false);
+ PagedIterable<PathItem> pathItems = fileSystemClient.listPaths(lpo,
null);
+
+ final String normalizedCurrentPath;
+ String tempPath = StringUtils.removeStart(currentFilePath, "/");
+ if (!tempPath.isEmpty() && !tempPath.endsWith("/")) {
+ normalizedCurrentPath = tempPath + "/";
} else {
- lastModified = 0;
- type = FileType.IMAGINARY;
- size = 0;
- pathItem = null;
- dirPathItem = null;
+ normalizedCurrentPath = tempPath;
+ }
+
+ Map<String, AzureListCache.ChildInfo> cacheEntries = new
LinkedHashMap<>();
+ pathItems.forEach(
+ item -> {
+ String itemName = item.getName();
+ String childName;
+
+ if (!normalizedCurrentPath.isEmpty() &&
itemName.startsWith(normalizedCurrentPath)) {
+ childName = itemName.substring(normalizedCurrentPath.length());
+ } else {
+ childName = itemName;
+ }
+
+ childName = StringUtils.removeStart(childName, "/");
+
+ if (!childName.isEmpty() && !childName.equals(".") &&
!childName.equals("..")) {
+ children.add(childName);
+ cacheEntries.put(
+ itemName,
+ new AzureListCache.ChildInfo(
+ Boolean.TRUE.equals(item.isDirectory()) ?
FileType.FOLDER : FileType.FILE,
+ item.getContentLength(),
+ item.getLastModified() != null
+ ? item.getLastModified().toInstant()
+ : Instant.EPOCH));
+ }
+ });
+ if (!cacheEntries.isEmpty()) {
+ getAbstractFileSystem().putListCache(containerName,
normalizedCurrentPath, cacheEntries);
}
+ size = children.size();
}
}
}
@@ -364,6 +422,11 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
children = null;
size = 0;
lastModified = 0;
+ if (containerName != null && currentFilePath != null) {
+ getAbstractFileSystem()
+ .invalidateListCacheForParentOf(
+ containerName, StringUtils.removeStart(currentFilePath,
"/"));
+ }
}
}
}
@@ -388,9 +451,12 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
// Start the copy operation
fileClient.rename(
containerName, ((AzureFileName)
newfile.getName()).getPathAfterContainer().substring(1));
- // newBlob.startCopy(cloudBlob.getUri());
- // Delete the original blob
- // doDelete();
+ getAbstractFileSystem()
+ .invalidateListCacheForParentOf(
+ containerName, StringUtils.removeStart(currentFilePath, "/"));
+ String newPath = ((AzureFileName)
newfile.getName()).getPathAfterContainer();
+ getAbstractFileSystem()
+ .invalidateListCacheForParentOf(containerName,
StringUtils.removeStart(newPath, "/"));
} else {
throw new FileSystemException("Renaming of directories not supported on
this file.");
}
@@ -398,8 +464,12 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
@Override
protected void doCreateFolder() {
- // create a folder, we already know the path
service.getFileSystemClient(containerName).createDirectory(currentFilePath.substring(1));
+ if (containerName != null && currentFilePath != null) {
+ getAbstractFileSystem()
+ .invalidateListCacheForParentOf(
+ containerName, StringUtils.removeStart(currentFilePath, "/"));
+ }
}
@Override
@@ -419,6 +489,9 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
throw new UnsupportedOperationException();
}
type = FileType.FILE;
+ getAbstractFileSystem()
+ .invalidateListCacheForParentOf(
+ containerName, StringUtils.removeStart(currentFilePath, "/"));
return new BlockBlobOutputStream(dataLakeFileClient.getOutputStream());
} else {
throw new UnsupportedOperationException();
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileProvider.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileProvider.java
index 4a61d05fbd..72c04edf2b 100644
---
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileProvider.java
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileProvider.java
@@ -205,6 +205,12 @@ public class AzureFileProvider extends
AbstractOriginatingFileProvider {
fileSystemOptions,
account);
+ if (azureMetadataType != null) {
+ String cacheTtlSeconds =
variables.resolve(azureMetadataType.getCacheTtlSeconds());
+ long ttlMs = org.apache.hop.core.Const.toLong(cacheTtlSeconds, 10L) *
1000L;
+ azureFileSystem.setListCacheTtlMs(ttlMs);
+ }
+
} finally {
UserAuthenticatorUtils.cleanup(authData);
}
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileSystem.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileSystem.java
index 49d6662f01..fdb32e76c2 100644
---
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileSystem.java
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileSystem.java
@@ -32,6 +32,8 @@ public class AzureFileSystem extends AbstractFileSystem {
private final DataLakeServiceClient serviceClient;
private final String fsName;
private final String account;
+ private long listCacheTtlMs = AzureListCache.DEFAULT_TTL_MS;
+ private AzureListCache listCache;
public AzureFileSystem(
AzureFileName fileName,
@@ -46,6 +48,17 @@ public class AzureFileSystem extends AbstractFileSystem {
this.account = account;
}
+ public void setListCacheTtlMs(long listCacheTtlMs) {
+ this.listCacheTtlMs = listCacheTtlMs;
+ }
+
+ private AzureListCache getListCache() {
+ if (listCache == null) {
+ listCache = new AzureListCache(listCacheTtlMs);
+ }
+ return listCache;
+ }
+
@Override
protected void addCapabilities(Collection<Capability> capabilities) {
capabilities.addAll(AzureFileProvider.capabilities);
@@ -63,4 +76,22 @@ public class AzureFileSystem extends AbstractFileSystem {
public String getAccount() {
return account;
}
+
+ void putListCache(
+ String container, String prefix, java.util.Map<String,
AzureListCache.ChildInfo> entries) {
+ getListCache().put(container, prefix, entries);
+ }
+
+ AzureListCache.ChildInfo getFromListCache(
+ String container, String parentPrefix, String childPath) {
+ return getListCache().get(container, parentPrefix, childPath);
+ }
+
+ void invalidateListCache(String container, String prefix) {
+ getListCache().invalidate(container, prefix);
+ }
+
+ void invalidateListCacheForParentOf(String container, String path) {
+ getListCache().invalidateParentOf(container, path);
+ }
}
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureListCache.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureListCache.java
new file mode 100644
index 0000000000..14e01b519d
--- /dev/null
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureListCache.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hop.vfs.azure;
+
+import java.time.Instant;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.commons.vfs2.FileType;
+
+/**
+ * Cache for Azure list results. When a directory is listed via listPaths, we
store the child paths
+ * and their types/sizes so that later doAttach() calls for those children can
be answered without
+ * extra API calls.
+ */
+public class AzureListCache {
+
+ public static final long DEFAULT_TTL_MS = 10_000L;
+
+ public static final class ChildInfo {
+ public final FileType type;
+ public final long size;
+ public final Instant lastModified;
+
+ public ChildInfo(FileType type, long size, Instant lastModified) {
+ this.type = type;
+ this.size = size;
+ this.lastModified = lastModified;
+ }
+ }
+
+ private final long ttlMs;
+ private final ConcurrentHashMap<String, CachedList> cache = new
ConcurrentHashMap<>();
+
+ public AzureListCache() {
+ this(DEFAULT_TTL_MS);
+ }
+
+ public AzureListCache(long ttlMs) {
+ this.ttlMs = ttlMs;
+ }
+
+ private static String cacheKey(String container, String prefix) {
+ return container + "|" + prefix;
+ }
+
+ private static class CachedList {
+ final Map<String, ChildInfo> entries;
+ final long expiryMillis;
+
+ CachedList(Map<String, ChildInfo> entries, long expiryMillis) {
+ this.entries = new ConcurrentHashMap<>(entries);
+ this.expiryMillis = expiryMillis;
+ }
+ }
+
+ /** Store list result for the given container and prefix. */
+ public void put(String container, String prefix, Map<String, ChildInfo>
childEntries) {
+ if (childEntries == null || childEntries.isEmpty()) {
+ return;
+ }
+ String key = cacheKey(container, prefix);
+ cache.put(key, new CachedList(childEntries, System.currentTimeMillis() +
ttlMs));
+ }
+
+ /**
+ * Look up cached type/size/lastModified for a child. Returns null if not in
cache or cache
+ * expired.
+ */
+ public ChildInfo get(String container, String parentPrefix, String
childPath) {
+ String key = cacheKey(container, parentPrefix);
+ CachedList cached = cache.get(key);
+ if (cached == null) {
+ return null;
+ }
+ if (System.currentTimeMillis() > cached.expiryMillis) {
+ cache.remove(key);
+ return null;
+ }
+ return cached.entries.get(childPath);
+ }
+
+ /** Invalidate the list cache for the given container and prefix. */
+ public void invalidate(String container, String prefix) {
+ cache.remove(cacheKey(container, prefix));
+ }
+
+ /**
+ * Invalidate the list cache for the parent directory of the given path.
Call after put/delete so
+ * the next list reflects the change.
+ */
+ public void invalidateParentOf(String container, String path) {
+ String parentPrefix = parentPrefix(path);
+ invalidate(container, parentPrefix);
+ }
+
+ /** Compute parent prefix: "path/to/" for path "path/to/file", "" for "file"
or "". */
+ public static String parentPrefix(String path) {
+ if (path == null || path.isEmpty()) {
+ return "";
+ }
+ String stripped = path.startsWith("/") ? path.substring(1) : path;
+ stripped = stripped.endsWith("/") ? stripped.substring(0,
stripped.length() - 1) : stripped;
+ int last = stripped.lastIndexOf('/');
+ return last >= 0 ? stripped.substring(0, last + 1) : "";
+ }
+}
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/metadatatype/AzureMetadataType.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/metadatatype/AzureMetadataType.java
index e430001029..5ad39f001d 100644
---
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/metadatatype/AzureMetadataType.java
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/metadatatype/AzureMetadataType.java
@@ -46,7 +46,11 @@ public class AzureMetadataType extends HopMetadataBase
implements Serializable,
@HopMetadataProperty private String storageAccountEndpoint;
+ /** Cache TTL in seconds for list-result caching (same as S3/MinIO). */
+ @HopMetadataProperty private String cacheTtlSeconds;
+
public AzureMetadataType() {
this.authenticationType = "Key"; // Default to Key authentication
+ this.cacheTtlSeconds = "5";
}
}
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/metadatatype/AzureMetadataTypeEditor.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/metadatatype/AzureMetadataTypeEditor.java
index bc571b6d7d..f7e5867ce1 100644
---
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/metadatatype/AzureMetadataTypeEditor.java
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/metadatatype/AzureMetadataTypeEditor.java
@@ -48,6 +48,7 @@ public class AzureMetadataTypeEditor extends
MetadataEditor<AzureMetadataType> {
private Label wlStorageAccountKey;
private PasswordTextVar wStorageAccountKey;
private TextVar wStorageAccountEndpoint;
+ private TextVar wCacheTtlSeconds;
public AzureMetadataTypeEditor(
HopGui hopGui, MetadataManager<AzureMetadataType> manager,
AzureMetadataType metadata) {
@@ -184,6 +185,26 @@ public class AzureMetadataTypeEditor extends
MetadataEditor<AzureMetadataType> {
wStorageAccountKey.setLayoutData(fdStorageAccountKey);
lastControl = wStorageAccountKey;
+ // Cache TTL (seconds)
+ //
+ Label wlCacheTtlSeconds = new Label(parent, SWT.RIGHT);
+ PropsUi.setLook(wlCacheTtlSeconds);
+ wlCacheTtlSeconds.setText(
+ BaseMessages.getString(PKG,
"AzureMetadataTypeEditor.CacheTtlSeconds.Label"));
+ FormData fdlCacheTtlSeconds = new FormData();
+ fdlCacheTtlSeconds.top = new FormAttachment(lastControl, margin);
+ fdlCacheTtlSeconds.left = new FormAttachment(0, 0);
+ fdlCacheTtlSeconds.right = new FormAttachment(middle, -margin);
+ wlCacheTtlSeconds.setLayoutData(fdlCacheTtlSeconds);
+ wCacheTtlSeconds = new TextVar(getVariables(), parent, SWT.SINGLE |
SWT.LEFT | SWT.BORDER);
+ PropsUi.setLook(wCacheTtlSeconds);
+ FormData fdCacheTtlSeconds = new FormData();
+ fdCacheTtlSeconds.top = new FormAttachment(wlCacheTtlSeconds, 0,
SWT.CENTER);
+ fdCacheTtlSeconds.left = new FormAttachment(middle, 0);
+ fdCacheTtlSeconds.right = new FormAttachment(95, 0);
+ wCacheTtlSeconds.setLayoutData(fdCacheTtlSeconds);
+ lastControl = wCacheTtlSeconds;
+
// Add listener to authentication type to show/hide storage account key
wAuthenticationType.addModifyListener(
e -> {
@@ -203,6 +224,7 @@ public class AzureMetadataTypeEditor extends
MetadataEditor<AzureMetadataType> {
wAuthenticationType.addModifyListener(e -> setChanged());
wStorageAccountKey.addModifyListener(e -> setChanged());
wStorageAccountEndpoint.addModifyListener(e -> setChanged());
+ wCacheTtlSeconds.addModifyListener(e -> setChanged());
}
@Override
@@ -214,6 +236,7 @@ public class AzureMetadataTypeEditor extends
MetadataEditor<AzureMetadataType> {
wAuthenticationType.setText(Const.NVL(azureMetadataType.getAuthenticationType(),
"Key"));
wStorageAccountKey.setText(Const.NVL(azureMetadataType.getStorageAccountKey(),
""));
wStorageAccountEndpoint.setText(Const.NVL(azureMetadataType.getStorageAccountEndpoint(),
""));
+ wCacheTtlSeconds.setText(Const.NVL(azureMetadataType.getCacheTtlSeconds(),
"5"));
// Show/hide storage account key based on authentication type
String authType = wAuthenticationType.getText();
@@ -230,6 +253,7 @@ public class AzureMetadataTypeEditor extends
MetadataEditor<AzureMetadataType> {
azureMetadataType.setAuthenticationType(wAuthenticationType.getText());
azureMetadataType.setStorageAccountKey(wStorageAccountKey.getText());
azureMetadataType.setStorageAccountEndpoint(wStorageAccountEndpoint.getText());
+ azureMetadataType.setCacheTtlSeconds(wCacheTtlSeconds.getText());
}
@Override
diff --git
a/plugins/tech/azure/src/main/resources/org/apache/hop/vfs/azure/metadatatype/messages/messages_en_US.properties
b/plugins/tech/azure/src/main/resources/org/apache/hop/vfs/azure/metadatatype/messages/messages_en_US.properties
index d60c5d6407..44fc1bb5a5 100644
---
a/plugins/tech/azure/src/main/resources/org/apache/hop/vfs/azure/metadatatype/messages/messages_en_US.properties
+++
b/plugins/tech/azure/src/main/resources/org/apache/hop/vfs/azure/metadatatype/messages/messages_en_US.properties
@@ -24,3 +24,5 @@ AzureMetadataTypeEditor.StorageAccountName.Label=Storage
Account Name
AzureMetadataTypeEditor.AuthenticationType.Label=Authentication Type
AzureMetadataTypeEditor.StorageAccountKey.Label=Storage Account Key
AzureMetadataTypeEditor.StorageAccountEndpoint.Label=Storage Endpoint
+AzureMetadataTypeEditor.CacheTtlSeconds.Label=Cache TTL (seconds)
+AzureMetadataTypeEditor.CacheTtlSeconds.Description=How long to cache folder
listing results (in seconds, default 5). If not set, falls back to 10 seconds.
diff --git
a/plugins/tech/azure/src/test/java/org/apache/hop/vfs/azure/AzureListCacheTest.java
b/plugins/tech/azure/src/test/java/org/apache/hop/vfs/azure/AzureListCacheTest.java
new file mode 100644
index 0000000000..5cd2ab7665
--- /dev/null
+++
b/plugins/tech/azure/src/test/java/org/apache/hop/vfs/azure/AzureListCacheTest.java
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hop.vfs.azure;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+import java.time.Instant;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import org.apache.commons.vfs2.FileType;
+import org.junit.jupiter.api.Test;
+
+class AzureListCacheTest {
+
+ @Test
+ void testDefaultTtl() {
+ assertEquals(10_000L, AzureListCache.DEFAULT_TTL_MS);
+ }
+
+ @Test
+ void testDefaultConstructorUsesTtl() {
+ AzureListCache cache = new AzureListCache();
+ Map<String, AzureListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("key", new AzureListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("container", "prefix/", entries);
+ assertNotNull(cache.get("container", "prefix/", "key"));
+ }
+
+ @Test
+ void testPutAndGet() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ Map<String, AzureListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put(
+ "path/to/file.txt", new AzureListCache.ChildInfo(FileType.FILE, 1024,
Instant.now()));
+ entries.put("path/to/sub/", new AzureListCache.ChildInfo(FileType.FOLDER,
0, Instant.EPOCH));
+
+ cache.put("mycontainer", "path/to/", entries);
+
+ AzureListCache.ChildInfo fileInfo = cache.get("mycontainer", "path/to/",
"path/to/file.txt");
+ assertNotNull(fileInfo);
+ assertEquals(FileType.FILE, fileInfo.type);
+ assertEquals(1024, fileInfo.size);
+
+ AzureListCache.ChildInfo folderInfo = cache.get("mycontainer", "path/to/",
"path/to/sub/");
+ assertNotNull(folderInfo);
+ assertEquals(FileType.FOLDER, folderInfo.type);
+ assertEquals(0, folderInfo.size);
+ }
+
+ @Test
+ void testGetMissingKey() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ Map<String, AzureListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put(
+ "path/to/file.txt", new AzureListCache.ChildInfo(FileType.FILE, 100,
Instant.now()));
+ cache.put("mycontainer", "path/to/", entries);
+
+ assertNull(cache.get("mycontainer", "path/to/", "path/to/other.txt"));
+ }
+
+ @Test
+ void testGetMissingContainer() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ assertNull(cache.get("nonexistent", "path/", "path/file"));
+ }
+
+ @Test
+ void testExpiry() throws InterruptedException {
+ AzureListCache cache = new AzureListCache(50L);
+ Map<String, AzureListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("key", new AzureListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("container", "prefix/", entries);
+
+ assertNotNull(cache.get("container", "prefix/", "key"));
+
+ Thread.sleep(100);
+
+ assertNull(cache.get("container", "prefix/", "key"));
+ }
+
+ @Test
+ void testInvalidate() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ Map<String, AzureListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("key", new AzureListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("container", "prefix/", entries);
+
+ assertNotNull(cache.get("container", "prefix/", "key"));
+ cache.invalidate("container", "prefix/");
+ assertNull(cache.get("container", "prefix/", "key"));
+ }
+
+ @Test
+ void testInvalidateParentOf() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ Map<String, AzureListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("dir/file.txt", new AzureListCache.ChildInfo(FileType.FILE,
10, Instant.now()));
+ cache.put("container", "dir/", entries);
+
+ assertNotNull(cache.get("container", "dir/", "dir/file.txt"));
+ cache.invalidateParentOf("container", "dir/file.txt");
+ assertNull(cache.get("container", "dir/", "dir/file.txt"));
+ }
+
+ @Test
+ void testPutEmptyMap() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ cache.put("container", "prefix/", new LinkedHashMap<>());
+ assertNull(cache.get("container", "prefix/", "anything"));
+ }
+
+ @Test
+ void testPutNull() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ cache.put("container", "prefix/", null);
+ assertNull(cache.get("container", "prefix/", "anything"));
+ }
+
+ @Test
+ void testParentPrefix() {
+ assertEquals("path/to/", AzureListCache.parentPrefix("path/to/file"));
+ assertEquals("", AzureListCache.parentPrefix("file"));
+ assertEquals("", AzureListCache.parentPrefix(""));
+ assertEquals("", AzureListCache.parentPrefix(null));
+ assertEquals("a/b/", AzureListCache.parentPrefix("a/b/c"));
+ }
+
+ @Test
+ void testParentPrefixWithLeadingSlash() {
+ assertEquals("", AzureListCache.parentPrefix("/topdir"));
+ assertEquals("path/to/", AzureListCache.parentPrefix("/path/to/file"));
+ }
+
+ @Test
+ void testParentPrefixWithTrailingSlash() {
+ assertEquals("path/to/", AzureListCache.parentPrefix("path/to/dir/"));
+ assertEquals("", AzureListCache.parentPrefix("topdir/"));
+ }
+
+ @Test
+ void testParentPrefixWithLeadingAndTrailingSlash() {
+ assertEquals("path/", AzureListCache.parentPrefix("/path/dir/"));
+ }
+
+ @Test
+ void testCustomTtl() {
+ AzureListCache cache = new AzureListCache(5_000L);
+ Map<String, AzureListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("key", new AzureListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("container", "prefix/", entries);
+ assertNotNull(cache.get("container", "prefix/", "key"));
+ }
+
+ @Test
+ void testDifferentContainersSamePrefix() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ Map<String, AzureListCache.ChildInfo> entries1 = new LinkedHashMap<>();
+ entries1.put("prefix/a", new AzureListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ Map<String, AzureListCache.ChildInfo> entries2 = new LinkedHashMap<>();
+ entries2.put("prefix/b", new AzureListCache.ChildInfo(FileType.FILE, 20,
Instant.now()));
+
+ cache.put("container1", "prefix/", entries1);
+ cache.put("container2", "prefix/", entries2);
+
+ assertNotNull(cache.get("container1", "prefix/", "prefix/a"));
+ assertNull(cache.get("container1", "prefix/", "prefix/b"));
+ assertNotNull(cache.get("container2", "prefix/", "prefix/b"));
+ assertNull(cache.get("container2", "prefix/", "prefix/a"));
+ }
+
+ @Test
+ void testInvalidateNonExistentEntry() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ cache.invalidate("nonexistent", "prefix/");
+ cache.invalidateParentOf("nonexistent", "some/key");
+ }
+
+ @Test
+ void testChildInfoFields() {
+ Instant now = Instant.now();
+ AzureListCache.ChildInfo info = new
AzureListCache.ChildInfo(FileType.FILE, 42, now);
+ assertEquals(FileType.FILE, info.type);
+ assertEquals(42, info.size);
+ assertEquals(now, info.lastModified);
+ }
+
+ @Test
+ void testChildInfoFolder() {
+ AzureListCache.ChildInfo info = new
AzureListCache.ChildInfo(FileType.FOLDER, 0, Instant.EPOCH);
+ assertEquals(FileType.FOLDER, info.type);
+ assertEquals(0, info.size);
+ assertEquals(Instant.EPOCH, info.lastModified);
+ }
+
+ @Test
+ void testPutOverwritesPreviousEntry() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ Map<String, AzureListCache.ChildInfo> entries1 = new LinkedHashMap<>();
+ entries1.put("key", new AzureListCache.ChildInfo(FileType.FILE, 100,
Instant.now()));
+ cache.put("container", "prefix/", entries1);
+
+ assertEquals(100, cache.get("container", "prefix/", "key").size);
+
+ Map<String, AzureListCache.ChildInfo> entries2 = new LinkedHashMap<>();
+ entries2.put("key", new AzureListCache.ChildInfo(FileType.FILE, 200,
Instant.now()));
+ cache.put("container", "prefix/", entries2);
+
+ assertEquals(200, cache.get("container", "prefix/", "key").size);
+ }
+
+ @Test
+ void testInvalidateOnlyAffectsTargetPrefix() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ Map<String, AzureListCache.ChildInfo> entries1 = new LinkedHashMap<>();
+ entries1.put("a/file", new AzureListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ Map<String, AzureListCache.ChildInfo> entries2 = new LinkedHashMap<>();
+ entries2.put("b/file", new AzureListCache.ChildInfo(FileType.FILE, 20,
Instant.now()));
+
+ cache.put("container", "a/", entries1);
+ cache.put("container", "b/", entries2);
+
+ cache.invalidate("container", "a/");
+ assertNull(cache.get("container", "a/", "a/file"));
+ assertNotNull(cache.get("container", "b/", "b/file"));
+ }
+
+ @Test
+ void testMultipleChildrenInSamePrefix() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ Map<String, AzureListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("dir/file1.txt", new AzureListCache.ChildInfo(FileType.FILE,
100, Instant.now()));
+ entries.put("dir/file2.txt", new AzureListCache.ChildInfo(FileType.FILE,
200, Instant.now()));
+ entries.put("dir/sub/", new AzureListCache.ChildInfo(FileType.FOLDER, 0,
Instant.EPOCH));
+
+ cache.put("container", "dir/", entries);
+
+ assertNotNull(cache.get("container", "dir/", "dir/file1.txt"));
+ assertNotNull(cache.get("container", "dir/", "dir/file2.txt"));
+ assertNotNull(cache.get("container", "dir/", "dir/sub/"));
+ assertEquals(100, cache.get("container", "dir/", "dir/file1.txt").size);
+ assertEquals(200, cache.get("container", "dir/", "dir/file2.txt").size);
+ assertEquals(FileType.FOLDER, cache.get("container", "dir/",
"dir/sub/").type);
+ }
+
+ @Test
+ void testRootPrefixEmpty() {
+ AzureListCache cache = new AzureListCache(60_000L);
+ Map<String, AzureListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("topfile.txt", new AzureListCache.ChildInfo(FileType.FILE, 50,
Instant.now()));
+ cache.put("container", "", entries);
+
+ assertNotNull(cache.get("container", "", "topfile.txt"));
+ }
+}
diff --git
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/googledrive/GoogleDriveFileObject.java
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/googledrive/GoogleDriveFileObject.java
index 975249b9c1..99a4aa1bde 100644
---
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/googledrive/GoogleDriveFileObject.java
+++
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/googledrive/GoogleDriveFileObject.java
@@ -169,10 +169,8 @@ public class GoogleDriveFileObject extends
AbstractFileObject {
}
ByteArrayContent fileContent =
new ByteArrayContent("application/octet-stream", toByteArray());
- if (count > 0) {
- driveService.files().create(file, fileContent).execute();
- ((GoogleDriveFileSystem)
getFileSystem()).clearFileFromCache(getName());
- }
+ driveService.files().create(file, fileContent).execute();
+ ((GoogleDriveFileSystem)
getFileSystem()).clearFileFromCache(getName());
}
};
}
diff --git
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
index 5f88b9f09e..660d9c0220 100644
---
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
+++
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileObject.java
@@ -20,6 +20,7 @@ package org.apache.hop.vfs.gs;
import com.google.api.gax.paging.Page;
import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.BlobInfo;
import com.google.cloud.storage.Bucket;
import com.google.cloud.storage.BucketInfo;
@@ -33,7 +34,9 @@ import java.time.Instant;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileType;
@@ -52,6 +55,10 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
String bucketPath;
String scheme;
+ private FileType cachedType = null;
+ private Long cachedSize = null;
+ private Instant cachedLastModified = null;
+
protected GoogleStorageFileObject(
String scheme, AbstractFileName name, GoogleStorageFileSystem fs) {
super(name, fs);
@@ -86,6 +93,20 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
this.bucket = storage.get(bucketName);
}
if (!bucketPath.isEmpty() && this.blob == null) {
+ String parentPrefix = GoogleStorageListCache.parentPrefix(bucketPath);
+ GoogleStorageListCache.ChildInfo cached =
+ getAbstractFileSystem().getFromListCache(bucketName, parentPrefix,
bucketPath);
+ if (cached == null) {
+ cached =
+ getAbstractFileSystem().getFromListCache(bucketName,
parentPrefix, bucketPath + "/");
+ }
+ if (cached != null) {
+ cachedType = cached.type;
+ cachedSize = cached.size;
+ cachedLastModified = cached.lastModified;
+ return;
+ }
+
this.blob = storage.get(bucketName, bucketPath);
String parent = getParentFolder(bucketPath);
String child = lastPathElement(stripTrailingSlash(bucketPath));
@@ -112,6 +133,9 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
@Override
protected long doGetContentSize() throws Exception {
+ if (cachedSize != null) {
+ return cachedSize;
+ }
return hasObject() ? blob.getSize() : 0L;
}
@@ -121,11 +145,17 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
throw new FileNotFoundException();
}
Storage storage = getAbstractFileSystem().setupStorage();
- return new ReadChannelInputStream(storage.reader(blob.getBlobId()));
+ if (blob != null) {
+ return new ReadChannelInputStream(storage.reader(blob.getBlobId()));
+ }
+ return new ReadChannelInputStream(storage.reader(BlobId.of(bucketName,
bucketPath)));
}
@Override
protected FileType doGetType() throws Exception {
+ if (cachedType != null) {
+ return cachedType;
+ }
if (getName() instanceof GoogleStorageFileName) {
return getName().getType();
}
@@ -154,18 +184,34 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
results.add(b.getName());
}
} else {
- Page<Blob> page;
+ String prefix;
if (!bucketPath.isEmpty()) {
+ prefix = appendTrailingSlash(bucketPath);
+ } else {
+ prefix = "";
+ }
+ Page<Blob> page;
+ if (!prefix.isEmpty()) {
page =
storage.list(
- bucketName,
- BlobListOption.currentDirectory(),
- BlobListOption.prefix(appendTrailingSlash(bucketPath)));
+ bucketName, BlobListOption.currentDirectory(),
BlobListOption.prefix(prefix));
} else {
page = storage.list(bucketName, BlobListOption.currentDirectory());
}
+ Map<String, GoogleStorageListCache.ChildInfo> cacheEntries = new
LinkedHashMap<>();
for (Blob b : page.iterateAll()) {
results.add(lastPathElement(stripTrailingSlash(b.getName())));
+ FileType childType = b.isDirectory() ? FileType.FOLDER : FileType.FILE;
+ long childSize = b.getSize() != null ? b.getSize() : 0L;
+ Instant childLastMod =
+ b.getUpdateTimeOffsetDateTime() != null
+ ? b.getUpdateTimeOffsetDateTime().toInstant()
+ : Instant.EPOCH;
+ cacheEntries.put(
+ b.getName(), new GoogleStorageListCache.ChildInfo(childType,
childSize, childLastMod));
+ }
+ if (!cacheEntries.isEmpty()) {
+ getAbstractFileSystem().putListCache(bucketName, prefix, cacheEntries);
}
}
return results.toArray(new String[0]);
@@ -179,6 +225,7 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
} else {
this.blob =
storage.create(BlobInfo.newBuilder(bucket,
appendTrailingSlash(bucketPath)).build());
+ getAbstractFileSystem().invalidateListCacheForParentOf(bucketName,
bucketPath);
}
}
@@ -190,16 +237,23 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
if (!blob.delete()) {
throw new IOException("Failed to delete object '" + this + "' (not
found)");
}
+ getAbstractFileSystem().invalidateListCacheForParentOf(bucketName,
bucketPath);
}
@Override
protected void doDetach() throws Exception {
bucket = null;
blob = null;
+ cachedType = null;
+ cachedSize = null;
+ cachedLastModified = null;
}
@Override
protected long doGetLastModifiedTime() throws Exception {
+ if (cachedLastModified != null &&
!Instant.EPOCH.equals(cachedLastModified)) {
+ return cachedLastModified.toEpochMilli();
+ }
if (hasObject()) {
GoogleCloudConfig config = GoogleCloudConfigSingleton.getConfig();
if (isFolder()) {
@@ -237,6 +291,7 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
this.blob =
storage.create(BlobInfo.newBuilder(bucket,
stripTrailingSlash(bucketPath)).build());
}
+ getAbstractFileSystem().invalidateListCacheForParentOf(bucketName,
bucketPath);
return new WriteChannelOutputStream(storage.writer(blob));
}
@@ -257,31 +312,47 @@ public class GoogleStorageFileObject extends
AbstractFileObject<GoogleStorageFil
getAbstractFileSystem()));
}
} else {
- Page<Blob> page;
+ String prefix;
if (!bucketPath.isEmpty()) {
+ prefix = appendTrailingSlash(bucketPath);
+ } else {
+ prefix = "";
+ }
+ Page<Blob> page;
+ if (!prefix.isEmpty()) {
page =
storage.list(
- bucketName,
- BlobListOption.currentDirectory(),
- BlobListOption.prefix(appendTrailingSlash(bucketPath)));
+ bucketName, BlobListOption.currentDirectory(),
BlobListOption.prefix(prefix));
} else {
page = storage.list(bucketName, BlobListOption.currentDirectory());
}
+ Map<String, GoogleStorageListCache.ChildInfo> cacheEntries = new
LinkedHashMap<>();
for (Blob b : page.iterateAll()) {
if (this.blob != null && b.getName().equals(this.blob.getName())) {
continue;
}
+ FileType childType = b.isDirectory() ? FileType.FOLDER : FileType.FILE;
+ long childSize = b.getSize() != null ? b.getSize() : 0L;
+ Instant childLastMod =
+ b.getUpdateTimeOffsetDateTime() != null
+ ? b.getUpdateTimeOffsetDateTime().toInstant()
+ : Instant.EPOCH;
+ cacheEntries.put(
+ b.getName(), new GoogleStorageListCache.ChildInfo(childType,
childSize, childLastMod));
results.add(
new GoogleStorageFileObject(
scheme,
new GoogleStorageFileName(
scheme,
getName().getPath() + "/" +
lastPathElement(stripTrailingSlash(b.getName())),
- b.isDirectory() ? FileType.FOLDER : FileType.FILE),
+ childType),
getAbstractFileSystem(),
this.bucket != null ? bucket : storage.get(bucketName),
b));
}
+ if (!cacheEntries.isEmpty()) {
+ getAbstractFileSystem().putListCache(bucketName, prefix, cacheEntries);
+ }
}
return results.toArray(new FileObject[0]);
}
diff --git
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileSystem.java
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileSystem.java
index 74a713b781..2dd9c028af 100644
---
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileSystem.java
+++
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageFileSystem.java
@@ -38,6 +38,8 @@ public class GoogleStorageFileSystem extends
AbstractFileSystem {
Storage storage = null;
FileSystemOptions fileSystemOptions;
+ private GoogleStorageListCache listCache;
+
protected GoogleStorageFileSystem(
FileName rootName, FileObject parentLayer, FileSystemOptions
fileSystemOptions)
throws FileSystemException {
@@ -45,6 +47,15 @@ public class GoogleStorageFileSystem extends
AbstractFileSystem {
this.fileSystemOptions = fileSystemOptions;
}
+ private GoogleStorageListCache getListCache() {
+ if (listCache == null) {
+ GoogleCloudConfig config = GoogleCloudConfigSingleton.getConfig();
+ long ttlMs =
org.apache.hop.core.Const.toLong(config.getCacheTtlSeconds(), 10L) * 1000L;
+ listCache = new GoogleStorageListCache(ttlMs);
+ }
+ return listCache;
+ }
+
@Override
protected FileObject createFile(AbstractFileName name) throws Exception {
return new GoogleStorageFileObject(
@@ -99,6 +110,26 @@ public class GoogleStorageFileSystem extends
AbstractFileSystem {
}
}
+ void putListCache(
+ String bucket,
+ String prefix,
+ java.util.Map<String, GoogleStorageListCache.ChildInfo> entries) {
+ getListCache().put(bucket, prefix, entries);
+ }
+
+ GoogleStorageListCache.ChildInfo getFromListCache(
+ String bucket, String parentPrefix, String childFullKey) {
+ return getListCache().get(bucket, parentPrefix, childFullKey);
+ }
+
+ void invalidateListCache(String bucket, String prefix) {
+ getListCache().invalidate(bucket, prefix);
+ }
+
+ void invalidateListCacheForParentOf(String bucket, String key) {
+ getListCache().invalidateParentOf(bucket, key);
+ }
+
String getBucketPath(FileName name) {
int idx = name.getPath().indexOf('/', 1);
if (idx > -1) {
diff --git
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageListCache.java
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageListCache.java
new file mode 100644
index 0000000000..ebc0cf22c5
--- /dev/null
+++
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/GoogleStorageListCache.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hop.vfs.gs;
+
+import java.time.Instant;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.commons.vfs2.FileType;
+
+/**
+ * Cache for GCS list results. When a directory is listed, we store the child
keys and their
+ * types/sizes so that later doAttach() calls for those children can be
answered without extra API
+ * calls.
+ */
+public class GoogleStorageListCache {
+
+ public static final long DEFAULT_TTL_MS = 10_000L;
+
+ public static final class ChildInfo {
+ public final FileType type;
+ public final long size;
+ public final Instant lastModified;
+
+ public ChildInfo(FileType type, long size, Instant lastModified) {
+ this.type = type;
+ this.size = size;
+ this.lastModified = lastModified;
+ }
+ }
+
+ private final long ttlMs;
+ private final ConcurrentHashMap<String, CachedList> cache = new
ConcurrentHashMap<>();
+
+ public GoogleStorageListCache() {
+ this(DEFAULT_TTL_MS);
+ }
+
+ public GoogleStorageListCache(long ttlMs) {
+ this.ttlMs = ttlMs;
+ }
+
+ private static String cacheKey(String bucket, String prefix) {
+ return bucket + "|" + prefix;
+ }
+
+ private static class CachedList {
+ final Map<String, ChildInfo> entries;
+ final long expiryMillis;
+
+ CachedList(Map<String, ChildInfo> entries, long expiryMillis) {
+ this.entries = new ConcurrentHashMap<>(entries);
+ this.expiryMillis = expiryMillis;
+ }
+ }
+
+ /** Store list result for the given bucket and prefix. */
+ public void put(String bucket, String prefix, Map<String, ChildInfo>
childEntries) {
+ if (childEntries == null || childEntries.isEmpty()) {
+ return;
+ }
+ String key = cacheKey(bucket, prefix);
+ cache.put(key, new CachedList(childEntries, System.currentTimeMillis() +
ttlMs));
+ }
+
+ /**
+ * Look up cached type/size/lastModified for a child. Returns null if not in
cache or cache
+ * expired.
+ */
+ public ChildInfo get(String bucket, String parentPrefix, String
childFullKey) {
+ String key = cacheKey(bucket, parentPrefix);
+ CachedList cached = cache.get(key);
+ if (cached == null) {
+ return null;
+ }
+ if (System.currentTimeMillis() > cached.expiryMillis) {
+ cache.remove(key);
+ return null;
+ }
+ return cached.entries.get(childFullKey);
+ }
+
+ /** Invalidate the list cache for the given bucket and prefix. */
+ public void invalidate(String bucket, String prefix) {
+ cache.remove(cacheKey(bucket, prefix));
+ }
+
+ /**
+ * Invalidate the list cache for the parent directory of the given key. Call
after put/delete so
+ * the next list reflects the change.
+ */
+ public void invalidateParentOf(String bucket, String key) {
+ String parentPrefix = parentPrefix(key);
+ invalidate(bucket, parentPrefix);
+ }
+
+ /** Compute parent prefix: "path/to/" for key "path/to/file", "" for "file"
or "". */
+ public static String parentPrefix(String key) {
+ if (key == null || key.isEmpty()) {
+ return "";
+ }
+ String stripped = key.endsWith("/") ? key.substring(0, key.length() - 1) :
key;
+ int last = stripped.lastIndexOf('/');
+ return last >= 0 ? stripped.substring(0, last + 1) : "";
+ }
+}
diff --git
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
index c44e5cbe5c..56b4b579da 100644
---
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
+++
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfig.java
@@ -39,6 +39,9 @@ public class GoogleCloudConfig {
private String connectionTimeout;
private String readTimeout;
+ /** Cache TTL in seconds for list-result caching (same as S3/MinIO/Azure). */
+ private String cacheTtlSeconds;
+
public GoogleCloudConfig() {
scanFoldersForLastModifDate = false;
maxAttempts = "6";
@@ -51,6 +54,7 @@ public class GoogleCloudConfig {
maxRpcTimeout = "50";
connectionTimeout = "20";
readTimeout = "20";
+ cacheTtlSeconds = "5";
}
public GoogleCloudConfig(GoogleCloudConfig config) {
@@ -67,5 +71,6 @@ public class GoogleCloudConfig {
maxRpcTimeout = config.maxRpcTimeout;
connectionTimeout = config.connectionTimeout;
readTimeout = config.readTimeout;
+ cacheTtlSeconds = config.cacheTtlSeconds;
}
}
diff --git
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
index f54a952a29..2d3c449866 100644
---
a/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
+++
b/plugins/tech/google/src/main/java/org/apache/hop/vfs/gs/config/GoogleCloudConfigPlugin.java
@@ -74,6 +74,8 @@ public class GoogleCloudConfigPlugin implements
IConfigOptions, IGuiPluginCompos
"10900-google-cloud-service-connect-timeout";
private static final String WIDGET_ID_GOOGLE_CLOUD_SERVICE_READ_TIMEOUT =
"1100-google-cloud-service-read-timeout";
+ private static final String WIDGET_ID_GOOGLE_CLOUD_CACHE_TTL_SECONDS =
+ "11100-google-cloud-cache-ttl-seconds";
@GuiWidgetElement(
id = WIDGET_ID_GOOGLE_CLOUD_SERVICE_ACCOUNT_KEY_FILE,
@@ -186,6 +188,15 @@ public class GoogleCloudConfigPlugin implements
IConfigOptions, IGuiPluginCompos
toolTip = "i18n::GoogleCloudPlugin.ReadTimeout.Description")
private String readTimeout;
+ @GuiWidgetElement(
+ id = WIDGET_ID_GOOGLE_CLOUD_CACHE_TTL_SECONDS,
+ parentId = ConfigPluginOptionsTab.GUI_WIDGETS_PARENT_ID,
+ type = GuiElementType.TEXT,
+ variables = true,
+ label = "i18n::GoogleCloudPlugin.CacheTtlSeconds.Label",
+ toolTip = "i18n::GoogleCloudPlugin.CacheTtlSeconds.Description")
+ private String cacheTtlSeconds;
+
/**
* Gets instance
*
@@ -207,6 +218,7 @@ public class GoogleCloudConfigPlugin implements
IConfigOptions, IGuiPluginCompos
instance.maxRpcTimeout = config.getMaxRpcTimeout();
instance.connectTimeout = config.getConnectionTimeout();
instance.readTimeout = config.getReadTimeout();
+ instance.cacheTtlSeconds = config.getCacheTtlSeconds();
return instance;
}
@@ -297,6 +309,12 @@ public class GoogleCloudConfigPlugin implements
IConfigOptions, IGuiPluginCompos
changed = true;
}
+ if (cacheTtlSeconds != null) {
+ config.setCacheTtlSeconds(cacheTtlSeconds);
+ log.logBasic("Google Cloud list cache TTL (seconds) set to " +
cacheTtlSeconds);
+ changed = true;
+ }
+
// Save to file if anything changed
//
if (changed) {
@@ -378,6 +396,10 @@ public class GoogleCloudConfigPlugin implements
IConfigOptions, IGuiPluginCompos
readTimeout = ((TextVar) control).getText();
GoogleCloudConfigSingleton.getConfig().setReadTimeout(readTimeout);
break;
+ case WIDGET_ID_GOOGLE_CLOUD_CACHE_TTL_SECONDS:
+ cacheTtlSeconds = ((TextVar) control).getText();
+
GoogleCloudConfigSingleton.getConfig().setCacheTtlSeconds(cacheTtlSeconds);
+ break;
default:
break;
}
diff --git
a/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
b/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
index d0bec7be27..23c0b2d185 100644
---
a/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
+++
b/plugins/tech/google/src/main/resources/org/apache/hop/vfs/gs/config/messages/messages_en_US.properties
@@ -40,5 +40,7 @@ GoogleCloudPlugin.ConnectTimeout.Label=Connect Timeout
GoogleCloudPlugin.ConnectTimeout.Description=Connect Timeout
GoogleCloudPlugin.ReadTimeout.Label=Read Timeout
GoogleCloudPlugin.ReadTimeout.Description=Read Timeout
+GoogleCloudPlugin.CacheTtlSeconds.Label=Cache TTL (seconds)
+GoogleCloudPlugin.CacheTtlSeconds.Description=How long to cache folder listing
results (in seconds, default 5). If not set, falls back to 10 seconds.
GoogleCloudPlugin.ScanFolderForLastModificationDate.Label=Scan folders to find
last modified data
GoogleCloudPlugin.ScanFolderForLastModificationDate.Description=Scans all
files in a folder and uses the most recent modification data as the folder
modification data. This may have a performance impact in buckets with lots of
files.
\ No newline at end of file
diff --git
a/plugins/tech/google/src/test/java/org/apache/hop/vfs/gs/GoogleStorageListCacheTest.java
b/plugins/tech/google/src/test/java/org/apache/hop/vfs/gs/GoogleStorageListCacheTest.java
new file mode 100644
index 0000000000..3546a23414
--- /dev/null
+++
b/plugins/tech/google/src/test/java/org/apache/hop/vfs/gs/GoogleStorageListCacheTest.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hop.vfs.gs;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+import java.time.Instant;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import org.apache.commons.vfs2.FileType;
+import org.junit.jupiter.api.Test;
+
+class GoogleStorageListCacheTest {
+
+ @Test
+ void testDefaultTtl() {
+ assertEquals(10_000L, GoogleStorageListCache.DEFAULT_TTL_MS);
+ }
+
+ @Test
+ void testDefaultConstructorUsesTtl() {
+ GoogleStorageListCache cache = new GoogleStorageListCache();
+ Map<String, GoogleStorageListCache.ChildInfo> entries = new
LinkedHashMap<>();
+ entries.put("key", new GoogleStorageListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("bucket", "prefix/", entries);
+ assertNotNull(cache.get("bucket", "prefix/", "key"));
+ }
+
+ @Test
+ void testPutAndGet() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries = new
LinkedHashMap<>();
+ entries.put(
+ "path/to/file.txt",
+ new GoogleStorageListCache.ChildInfo(FileType.FILE, 1024,
Instant.now()));
+ entries.put(
+ "path/to/sub/", new GoogleStorageListCache.ChildInfo(FileType.FOLDER,
0, Instant.EPOCH));
+
+ cache.put("mybucket", "path/to/", entries);
+
+ GoogleStorageListCache.ChildInfo fileInfo =
+ cache.get("mybucket", "path/to/", "path/to/file.txt");
+ assertNotNull(fileInfo);
+ assertEquals(FileType.FILE, fileInfo.type);
+ assertEquals(1024, fileInfo.size);
+
+ GoogleStorageListCache.ChildInfo folderInfo = cache.get("mybucket",
"path/to/", "path/to/sub/");
+ assertNotNull(folderInfo);
+ assertEquals(FileType.FOLDER, folderInfo.type);
+ assertEquals(0, folderInfo.size);
+ }
+
+ @Test
+ void testGetMissingKey() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries = new
LinkedHashMap<>();
+ entries.put(
+ "path/to/file.txt",
+ new GoogleStorageListCache.ChildInfo(FileType.FILE, 100,
Instant.now()));
+ cache.put("mybucket", "path/to/", entries);
+
+ assertNull(cache.get("mybucket", "path/to/", "path/to/other.txt"));
+ }
+
+ @Test
+ void testGetMissingBucket() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ assertNull(cache.get("nonexistent", "path/", "path/file"));
+ }
+
+ @Test
+ void testExpiry() throws InterruptedException {
+ GoogleStorageListCache cache = new GoogleStorageListCache(50L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries = new
LinkedHashMap<>();
+ entries.put("key", new GoogleStorageListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("bucket", "prefix/", entries);
+
+ assertNotNull(cache.get("bucket", "prefix/", "key"));
+
+ Thread.sleep(100);
+
+ assertNull(cache.get("bucket", "prefix/", "key"));
+ }
+
+ @Test
+ void testInvalidate() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries = new
LinkedHashMap<>();
+ entries.put("key", new GoogleStorageListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("bucket", "prefix/", entries);
+
+ assertNotNull(cache.get("bucket", "prefix/", "key"));
+ cache.invalidate("bucket", "prefix/");
+ assertNull(cache.get("bucket", "prefix/", "key"));
+ }
+
+ @Test
+ void testInvalidateParentOf() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries = new
LinkedHashMap<>();
+ entries.put(
+ "dir/file.txt", new GoogleStorageListCache.ChildInfo(FileType.FILE,
10, Instant.now()));
+ cache.put("bucket", "dir/", entries);
+
+ assertNotNull(cache.get("bucket", "dir/", "dir/file.txt"));
+ cache.invalidateParentOf("bucket", "dir/file.txt");
+ assertNull(cache.get("bucket", "dir/", "dir/file.txt"));
+ }
+
+ @Test
+ void testPutEmptyMap() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ cache.put("bucket", "prefix/", new LinkedHashMap<>());
+ assertNull(cache.get("bucket", "prefix/", "anything"));
+ }
+
+ @Test
+ void testPutNull() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ cache.put("bucket", "prefix/", null);
+ assertNull(cache.get("bucket", "prefix/", "anything"));
+ }
+
+ @Test
+ void testParentPrefix() {
+ assertEquals("path/to/",
GoogleStorageListCache.parentPrefix("path/to/file"));
+ assertEquals("", GoogleStorageListCache.parentPrefix("file"));
+ assertEquals("", GoogleStorageListCache.parentPrefix(""));
+ assertEquals("", GoogleStorageListCache.parentPrefix(null));
+ assertEquals("a/b/", GoogleStorageListCache.parentPrefix("a/b/c"));
+ }
+
+ @Test
+ void testParentPrefixWithTrailingSlash() {
+ assertEquals("path/to/",
GoogleStorageListCache.parentPrefix("path/to/dir/"));
+ assertEquals("", GoogleStorageListCache.parentPrefix("topdir/"));
+ }
+
+ @Test
+ void testCustomTtl() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(5_000L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries = new
LinkedHashMap<>();
+ entries.put("key", new GoogleStorageListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("bucket", "prefix/", entries);
+ assertNotNull(cache.get("bucket", "prefix/", "key"));
+ }
+
+ @Test
+ void testDifferentBucketsSamePrefix() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries1 = new
LinkedHashMap<>();
+ entries1.put(
+ "prefix/a", new GoogleStorageListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ Map<String, GoogleStorageListCache.ChildInfo> entries2 = new
LinkedHashMap<>();
+ entries2.put(
+ "prefix/b", new GoogleStorageListCache.ChildInfo(FileType.FILE, 20,
Instant.now()));
+
+ cache.put("bucket1", "prefix/", entries1);
+ cache.put("bucket2", "prefix/", entries2);
+
+ assertNotNull(cache.get("bucket1", "prefix/", "prefix/a"));
+ assertNull(cache.get("bucket1", "prefix/", "prefix/b"));
+ assertNotNull(cache.get("bucket2", "prefix/", "prefix/b"));
+ assertNull(cache.get("bucket2", "prefix/", "prefix/a"));
+ }
+
+ @Test
+ void testInvalidateNonExistentEntry() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ cache.invalidate("nonexistent", "prefix/");
+ cache.invalidateParentOf("nonexistent", "some/key");
+ }
+
+ @Test
+ void testChildInfoFields() {
+ Instant now = Instant.now();
+ GoogleStorageListCache.ChildInfo info =
+ new GoogleStorageListCache.ChildInfo(FileType.FILE, 42, now);
+ assertEquals(FileType.FILE, info.type);
+ assertEquals(42, info.size);
+ assertEquals(now, info.lastModified);
+ }
+
+ @Test
+ void testChildInfoFolder() {
+ GoogleStorageListCache.ChildInfo info =
+ new GoogleStorageListCache.ChildInfo(FileType.FOLDER, 0,
Instant.EPOCH);
+ assertEquals(FileType.FOLDER, info.type);
+ assertEquals(0, info.size);
+ assertEquals(Instant.EPOCH, info.lastModified);
+ }
+
+ @Test
+ void testPutOverwritesPreviousEntry() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries1 = new
LinkedHashMap<>();
+ entries1.put("key", new GoogleStorageListCache.ChildInfo(FileType.FILE,
100, Instant.now()));
+ cache.put("bucket", "prefix/", entries1);
+
+ assertEquals(100, cache.get("bucket", "prefix/", "key").size);
+
+ Map<String, GoogleStorageListCache.ChildInfo> entries2 = new
LinkedHashMap<>();
+ entries2.put("key", new GoogleStorageListCache.ChildInfo(FileType.FILE,
200, Instant.now()));
+ cache.put("bucket", "prefix/", entries2);
+
+ assertEquals(200, cache.get("bucket", "prefix/", "key").size);
+ }
+
+ @Test
+ void testInvalidateOnlyAffectsTargetPrefix() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries1 = new
LinkedHashMap<>();
+ entries1.put("a/file", new GoogleStorageListCache.ChildInfo(FileType.FILE,
10, Instant.now()));
+ Map<String, GoogleStorageListCache.ChildInfo> entries2 = new
LinkedHashMap<>();
+ entries2.put("b/file", new GoogleStorageListCache.ChildInfo(FileType.FILE,
20, Instant.now()));
+
+ cache.put("bucket", "a/", entries1);
+ cache.put("bucket", "b/", entries2);
+
+ cache.invalidate("bucket", "a/");
+ assertNull(cache.get("bucket", "a/", "a/file"));
+ assertNotNull(cache.get("bucket", "b/", "b/file"));
+ }
+
+ @Test
+ void testMultipleChildrenInSamePrefix() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries = new
LinkedHashMap<>();
+ entries.put(
+ "dir/file1.txt", new GoogleStorageListCache.ChildInfo(FileType.FILE,
100, Instant.now()));
+ entries.put(
+ "dir/file2.txt", new GoogleStorageListCache.ChildInfo(FileType.FILE,
200, Instant.now()));
+ entries.put(
+ "dir/sub/", new GoogleStorageListCache.ChildInfo(FileType.FOLDER, 0,
Instant.EPOCH));
+
+ cache.put("bucket", "dir/", entries);
+
+ assertNotNull(cache.get("bucket", "dir/", "dir/file1.txt"));
+ assertNotNull(cache.get("bucket", "dir/", "dir/file2.txt"));
+ assertNotNull(cache.get("bucket", "dir/", "dir/sub/"));
+ assertEquals(100, cache.get("bucket", "dir/", "dir/file1.txt").size);
+ assertEquals(200, cache.get("bucket", "dir/", "dir/file2.txt").size);
+ assertEquals(FileType.FOLDER, cache.get("bucket", "dir/",
"dir/sub/").type);
+ }
+
+ @Test
+ void testRootPrefixEmpty() {
+ GoogleStorageListCache cache = new GoogleStorageListCache(60_000L);
+ Map<String, GoogleStorageListCache.ChildInfo> entries = new
LinkedHashMap<>();
+ entries.put(
+ "topfile.txt", new GoogleStorageListCache.ChildInfo(FileType.FILE, 50,
Instant.now()));
+ cache.put("bucket", "", entries);
+
+ assertNotNull(cache.get("bucket", "", "topfile.txt"));
+ }
+}
diff --git
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileObject.java
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileObject.java
index a5aeb4dadb..eb88d0a9bf 100644
---
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileObject.java
+++
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileObject.java
@@ -38,9 +38,12 @@ import io.minio.messages.Item;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.OutputStream;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSystemException;
@@ -63,6 +66,10 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
protected MinioPipedOutputStream outputStream;
protected GetObjectResponse responseInputStream;
+ private FileType cachedType = null;
+ private Long cachedSize = null;
+ private Instant cachedLastModified = null;
+
protected MinioFileObject(final AbstractFileName name, final MinioFileSystem
fileSystem) {
super(name, fileSystem);
this.fileSystem = fileSystem;
@@ -91,6 +98,9 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
@Override
protected long doGetContentSize() {
+ if (cachedSize != null) {
+ return cachedSize;
+ }
if (statObjectResponse != null) {
return statObjectResponse.size();
} else {
@@ -116,7 +126,6 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
protected String[] doListChildren() throws Exception {
List<String> childrenList = new ArrayList<>();
- // Do we need to list the buckets as "folders" of the root?
if (isRootBucket()) {
List<Bucket> buckets = fileSystem.getClient().listBuckets();
for (Bucket bucket : buckets) {
@@ -127,17 +136,31 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
if (!path.endsWith("/")) {
path += DELIMITER;
}
- // The regular case
+ String prefix = path.equals(DELIMITER) ? "" : path;
+
ListObjectsArgs.Builder listObjectsArgsBuilder =
ListObjectsArgs.builder().bucket(bucketName);
- if (!path.equals(DELIMITER)) listObjectsArgsBuilder.prefix(path);
+ if (!prefix.isEmpty()) {
+ listObjectsArgsBuilder.prefix(prefix);
+ }
ListObjectsArgs args = listObjectsArgsBuilder.build();
Iterable<Result<Item>> results =
fileSystem.getClient().listObjects(args);
+ Map<String, MinioListCache.ChildInfo> cacheEntries = new
LinkedHashMap<>();
for (Result<Item> result : results) {
Item item = result.get();
if (item != null) {
String objectName = item.objectName();
+ FileType childType = item.isDir() ? FileType.FOLDER : FileType.FILE;
+ long childSize = item.size();
+ Instant childLastMod =
+ item.lastModified() != null ? item.lastModified().toInstant() :
Instant.EPOCH;
+
+ cacheEntries.put(
+ objectName, new MinioListCache.ChildInfo(childType, childSize,
childLastMod));
if (item.isDir() && !objectName.endsWith(DELIMITER)) {
+ cacheEntries.put(
+ objectName + DELIMITER,
+ new MinioListCache.ChildInfo(childType, childSize,
childLastMod));
objectName += DELIMITER;
}
@@ -149,6 +172,9 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
}
}
}
+ if (!cacheEntries.isEmpty()) {
+ fileSystem.putListCache(bucketName, prefix, cacheEntries);
+ }
}
return childrenList.toArray(new String[0]);
}
@@ -163,8 +189,6 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
@Override
public void doAttach() throws FileSystemException {
- // This allows us to sprinkle doAttach() where needed without incurring a
performance hit.
- //
if (attached) {
return;
}
@@ -173,32 +197,34 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
LogChannel.GENERAL.logDebug("Attach called on {0}", getQualifiedName());
injectType(FileType.IMAGINARY);
- // The root bucket is minio:/// and is a folder
- // If the key is empty and the bucket is known, it's also a folder
- //
if (StringUtils.isEmpty(key)) {
- // cannot attach to root bucket
injectType(FileType.FOLDER);
return;
}
+ String parentPrefix = MinioListCache.parentPrefix(key);
+ MinioListCache.ChildInfo cached = fileSystem.getFromListCache(bucketName,
parentPrefix, key);
+ if (cached == null && !key.endsWith(DELIMITER)) {
+ cached = fileSystem.getFromListCache(bucketName, parentPrefix, key +
DELIMITER);
+ }
+ if (cached != null) {
+ cachedType = cached.type;
+ cachedSize = cached.size;
+ cachedLastModified = cached.lastModified;
+ injectType(cached.type);
+ return;
+ }
+
try {
- // We'll first try the file scenario:
- //
StatObjectArgs statArgs =
StatObjectArgs.builder().bucket(bucketName).object(key).build();
statObjectResponse = fileSystem.getClient().statObject(statArgs);
- // In MinIO keys with a trailing slash (delimiter), are considered
folders.
- //
if (key.endsWith(DELIMITER)) {
injectType(FileType.FOLDER);
} else {
injectType(getName().getType());
}
} catch (Exception e) {
- // File does not exist.
- // Perhaps it's a folder and we can find it by looking for the key with
an extra slash?
- //
if (key.endsWith(DELIMITER)) {
statObjectResponse = null;
} else {
@@ -208,7 +234,6 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
statObjectResponse = fileSystem.getClient().statObject(statArgs);
injectType(FileType.FOLDER);
} catch (Exception ex) {
- // Still doesn't exist?
statObjectResponse = null;
}
}
@@ -232,6 +257,9 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
outputStream = null;
responseInputStream = null;
statObjectResponse = null;
+ cachedType = null;
+ cachedSize = null;
+ cachedLastModified = null;
attached = false;
}
}
@@ -274,6 +302,7 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
RemoveObjectArgs removeObjectArgs =
RemoveObjectArgs.builder().bucket(bucketName).object(key).build();
fileSystem.getClient().removeObject(removeObjectArgs);
+ fileSystem.invalidateListCacheForParentOf(bucketName, key);
doDetach();
} catch (Exception e) {
throw new FileSystemException("Error deleting object " + key + " in
bucket " + bucketName, e);
@@ -317,6 +346,7 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
.stream(new ByteArrayInputStream(new byte[] {}), 0, -1)
.build();
client.putObject(args);
+ fileSystem.invalidateListCacheForParentOf(bucketName, key);
} catch (Exception e) {
throw new FileSystemException("Error creating folder", e);
} finally {
@@ -353,6 +383,9 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
@Override
public long doGetLastModifiedTime() {
+ if (cachedLastModified != null &&
!Instant.EPOCH.equals(cachedLastModified)) {
+ return cachedLastModified.toEpochMilli();
+ }
if (statObjectResponse == null) {
return 0;
}
@@ -370,6 +403,7 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
.contentType("binary/octet-stream")
.build();
fileSystem.getClient().putObject(args);
+ fileSystem.invalidateListCacheForParentOf(bucketName, key);
} catch (Exception e) {
throw new
FileSystemException("vfs.provider.local/create-folder.error", this, e);
}
@@ -404,11 +438,11 @@ public class MinioFileObject extends
AbstractFileObject<MinioFileSystem> {
.build();
fileSystem.getClient().copyObject(args);
- // Delete self
+ fileSystem.invalidateListCacheForParentOf(bucketName, key);
+ fileSystem.invalidateListCacheForParentOf(dest.bucketName, dest.key);
+
delete();
- // Invalidate metadata: the old file no longer exists
- //
closeMinio();
}
diff --git
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileProvider.java
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileProvider.java
index 32d9917d7d..be05eb0e2e 100644
---
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileProvider.java
+++
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileProvider.java
@@ -129,6 +129,11 @@ public class MinioFileProvider extends
AbstractOriginatingFileProvider {
String partSize = variables.resolve(minioMeta.getPartSize());
fileSystem.setPartSize(Const.toLong(partSize, DEFAULT_PART_SIZE));
+ // List cache TTL in seconds (same as S3)
+ String cacheTtlSeconds =
variables.resolve(minioMeta.getCacheTtlSeconds());
+ long ttlMs = Const.toLong(cacheTtlSeconds, 10L) * 1000L;
+ fileSystem.setListCacheTtlMs(ttlMs);
+
return fileSystem;
} finally {
UserAuthenticatorUtils.cleanup(authData);
diff --git
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileSystem.java
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileSystem.java
index e38135e762..26d33b12bd 100644
---
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileSystem.java
+++
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioFileSystem.java
@@ -51,11 +51,20 @@ public class MinioFileSystem extends AbstractFileSystem {
protected long partSize;
private MinioClient client;
+ private long listCacheTtlMs = MinioListCache.DEFAULT_TTL_MS;
+ private MinioListCache listCache;
protected MinioFileSystem(final FileName rootName, final FileSystemOptions
fileSystemOptions) {
super(rootName, null, fileSystemOptions);
}
+ private MinioListCache getListCache() {
+ if (listCache == null) {
+ listCache = new MinioListCache(listCacheTtlMs);
+ }
+ return listCache;
+ }
+
@Override
protected void addCapabilities(Collection<Capability> caps) {
caps.addAll(CAPABILITIES);
@@ -126,4 +135,26 @@ public class MinioFileSystem extends AbstractFileSystem {
public long convertToLong(String partSize) {
return storageUnitConverter.displaySizeToByteCount(partSize);
}
+
+ public void setListCacheTtlMs(long listCacheTtlMs) {
+ this.listCacheTtlMs = listCacheTtlMs;
+ }
+
+ void putListCache(
+ String bucket, String prefix, java.util.Map<String,
MinioListCache.ChildInfo> entries) {
+ getListCache().put(bucket, prefix, entries);
+ }
+
+ MinioListCache.ChildInfo getFromListCache(
+ String bucket, String parentPrefix, String childFullKey) {
+ return getListCache().get(bucket, parentPrefix, childFullKey);
+ }
+
+ void invalidateListCache(String bucket, String prefix) {
+ getListCache().invalidate(bucket, prefix);
+ }
+
+ void invalidateListCacheForParentOf(String bucket, String key) {
+ getListCache().invalidateParentOf(bucket, key);
+ }
}
diff --git
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioListCache.java
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioListCache.java
new file mode 100644
index 0000000000..119c1ed052
--- /dev/null
+++
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/MinioListCache.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hop.vfs.minio;
+
+import java.time.Instant;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.commons.vfs2.FileType;
+
+/**
+ * Cache for MinIO list results. When a directory is listed via listObjects,
we store the child keys
+ * and their types/sizes so that later doAttach() calls for those children can
skip statObject
+ * calls.
+ */
+public class MinioListCache {
+
+ public static final long DEFAULT_TTL_MS = 10_000L;
+
+ public static final class ChildInfo {
+ public final FileType type;
+ public final long size;
+ public final Instant lastModified;
+
+ public ChildInfo(FileType type, long size, Instant lastModified) {
+ this.type = type;
+ this.size = size;
+ this.lastModified = lastModified;
+ }
+ }
+
+ private final long ttlMs;
+ private final ConcurrentHashMap<String, CachedList> cache = new
ConcurrentHashMap<>();
+
+ public MinioListCache() {
+ this(DEFAULT_TTL_MS);
+ }
+
+ public MinioListCache(long ttlMs) {
+ this.ttlMs = ttlMs;
+ }
+
+ private static String cacheKey(String bucket, String prefix) {
+ return bucket + "|" + prefix;
+ }
+
+ private static class CachedList {
+ final Map<String, ChildInfo> entries;
+ final long expiryMillis;
+
+ CachedList(Map<String, ChildInfo> entries, long expiryMillis) {
+ this.entries = new ConcurrentHashMap<>(entries);
+ this.expiryMillis = expiryMillis;
+ }
+ }
+
+ /** Store list result for the given bucket and prefix. */
+ public void put(String bucket, String prefix, Map<String, ChildInfo>
childEntries) {
+ if (childEntries == null || childEntries.isEmpty()) {
+ return;
+ }
+ String key = cacheKey(bucket, prefix);
+ cache.put(key, new CachedList(childEntries, System.currentTimeMillis() +
ttlMs));
+ }
+
+ /**
+ * Look up cached type/size/lastModified for a child. Returns null if not in
cache or cache
+ * expired.
+ */
+ public ChildInfo get(String bucket, String parentPrefix, String
childFullKey) {
+ String key = cacheKey(bucket, parentPrefix);
+ CachedList cached = cache.get(key);
+ if (cached == null) {
+ return null;
+ }
+ if (System.currentTimeMillis() > cached.expiryMillis) {
+ cache.remove(key);
+ return null;
+ }
+ return cached.entries.get(childFullKey);
+ }
+
+ /** Invalidate the list cache for the given bucket and prefix. */
+ public void invalidate(String bucket, String prefix) {
+ cache.remove(cacheKey(bucket, prefix));
+ }
+
+ /**
+ * Invalidate the list cache for the parent directory of the given key. Call
after put/delete so
+ * the next list reflects the change.
+ */
+ public void invalidateParentOf(String bucket, String key) {
+ String parentPrefix = parentPrefix(key);
+ invalidate(bucket, parentPrefix);
+ }
+
+ /** Compute parent prefix: "path/to/" for key "path/to/file", "" for "file"
or "". */
+ public static String parentPrefix(String key) {
+ if (key == null || key.isEmpty()) {
+ return "";
+ }
+ String stripped = key.endsWith("/") ? key.substring(0, key.length() - 1) :
key;
+ int last = stripped.lastIndexOf('/');
+ return last >= 0 ? stripped.substring(0, last + 1) : "";
+ }
+}
diff --git
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/metadata/MinioMeta.java
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/metadata/MinioMeta.java
index 8d93cdc5ea..66f819ed41 100644
---
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/metadata/MinioMeta.java
+++
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/metadata/MinioMeta.java
@@ -47,6 +47,7 @@ public class MinioMeta extends HopMetadataBase implements
Serializable, IHopMeta
private static final String WIDGET_ID_MINIO_ENDPOINT_SECURE =
"10220-minio-endpoint-secure";
private static final String WIDGET_ID_MINIO_REGION = "10300-minio-region";
private static final String WIDGET_ID_MINIO_PART_SIZE =
"10400-minio-part-size";
+ private static final String WIDGET_ID_MINIO_CACHE_TTL_SECONDS =
"10410-minio-cache-ttl-seconds";
@GuiWidgetElement(
id = WIDGET_ID_MINIO_DESCRIPTION,
@@ -122,8 +123,18 @@ public class MinioMeta extends HopMetadataBase implements
Serializable, IHopMeta
@HopMetadataProperty
private String partSize;
+ @GuiWidgetElement(
+ id = WIDGET_ID_MINIO_CACHE_TTL_SECONDS,
+ parentId = MinioMetaEditor.GUI_WIDGETS_PARENT_ID,
+ type = GuiElementType.TEXT,
+ label = "i18n:org.apache.hop.vfs.minio.metadata:MinioVFS.CacheTtl.Label",
+ toolTip =
"i18n:org.apache.hop.vfs.minio.metadata:MinioVFS.CacheTtl.Description")
+ @HopMetadataProperty
+ private String cacheTtlSeconds;
+
public MinioMeta() {
// Do nothing
this.partSize = Integer.toString(5 * 1024 * 1024);
+ this.cacheTtlSeconds = "5";
}
}
diff --git
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/util/MinioPipedOutputStream.java
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/util/MinioPipedOutputStream.java
index e2a1c5f35a..88f5369ba5 100644
---
a/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/util/MinioPipedOutputStream.java
+++
b/plugins/tech/minio/src/main/java/org/apache/hop/vfs/minio/util/MinioPipedOutputStream.java
@@ -84,6 +84,8 @@ public class MinioPipedOutputStream extends PipedOutputStream
{
@Override
public void close() throws IOException {
+ // Start upload even when no bytes were written (0-byte file), so
PutObject is still executed
+ initializeWrite();
super.close();
if (initialized && isBlockedUntilDone()) {
while (!result.get()) {
diff --git
a/plugins/tech/minio/src/main/resources/org/apache/hop/vfs/minio/metadata/messages/messages_en_US.properties
b/plugins/tech/minio/src/main/resources/org/apache/hop/vfs/minio/metadata/messages/messages_en_US.properties
index 0925b95080..0bf06a5184 100644
---
a/plugins/tech/minio/src/main/resources/org/apache/hop/vfs/minio/metadata/messages/messages_en_US.properties
+++
b/plugins/tech/minio/src/main/resources/org/apache/hop/vfs/minio/metadata/messages/messages_en_US.properties
@@ -34,3 +34,5 @@ MinioVFS.Region.Label = Region
MinioVFS.Region.Description = Optionally, the MinIO region to use
MinioVFS.PartSize.Label = Object part size
MinioVFS.PartSize.Description = Optionally, the MinIO default object part size
+MinioVFS.CacheTtl.Label = Cache TTL (seconds)
+MinioVFS.CacheTtl.Description = How long to cache folder listing results (in
seconds, default 5). If not set, falls back to 10 seconds.
diff --git
a/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioFileSystemTest.java
b/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioFileSystemTest.java
index 9d8fab1949..63fdba45c2 100644
---
a/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioFileSystemTest.java
+++
b/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioFileSystemTest.java
@@ -20,8 +20,12 @@ package org.apache.hop.vfs.minio;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import java.time.Instant;
+import java.util.LinkedHashMap;
+import java.util.Map;
import org.apache.commons.vfs2.Capability;
import org.apache.commons.vfs2.FileSystemOptions;
import org.apache.commons.vfs2.FileType;
@@ -236,4 +240,61 @@ class MinioFileSystemTest {
fileSystem.setPartSize(MinioFileProvider.DEFAULT_PART_SIZE);
assertEquals(5242880L, fileSystem.getPartSize(), "Default part size should
be 5MB");
}
+
+ @Test
+ void testPutAndGetListCache() {
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("path/file.txt", new MinioListCache.ChildInfo(FileType.FILE,
512, Instant.now()));
+
+ fileSystem.putListCache("test-bucket", "path/", entries);
+
+ MinioListCache.ChildInfo info =
+ fileSystem.getFromListCache("test-bucket", "path/", "path/file.txt");
+ assertNotNull(info, "Should retrieve cached entry");
+ assertEquals(FileType.FILE, info.type);
+ assertEquals(512, info.size);
+ }
+
+ @Test
+ void testGetFromListCacheMiss() {
+ MinioListCache.ChildInfo info =
+ fileSystem.getFromListCache("test-bucket", "path/",
"path/missing.txt");
+ assertNull(info, "Should return null for cache miss");
+ }
+
+ @Test
+ void testInvalidateListCache() {
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("path/file.txt", new MinioListCache.ChildInfo(FileType.FILE,
10, Instant.now()));
+ fileSystem.putListCache("test-bucket", "path/", entries);
+
+ assertNotNull(fileSystem.getFromListCache("test-bucket", "path/",
"path/file.txt"));
+
+ fileSystem.invalidateListCache("test-bucket", "path/");
+ assertNull(fileSystem.getFromListCache("test-bucket", "path/",
"path/file.txt"));
+ }
+
+ @Test
+ void testInvalidateListCacheForParentOf() {
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("dir/file.txt", new MinioListCache.ChildInfo(FileType.FILE,
10, Instant.now()));
+ fileSystem.putListCache("test-bucket", "dir/", entries);
+
+ assertNotNull(fileSystem.getFromListCache("test-bucket", "dir/",
"dir/file.txt"));
+
+ fileSystem.invalidateListCacheForParentOf("test-bucket", "dir/file.txt");
+ assertNull(fileSystem.getFromListCache("test-bucket", "dir/",
"dir/file.txt"));
+ }
+
+ @Test
+ void testListCacheIsolatedBetweenInstances() {
+ MinioFileSystem fs2 = new MinioFileSystem(rootName, new
FileSystemOptions());
+
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("key", new MinioListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ fileSystem.putListCache("bucket", "prefix/", entries);
+
+ assertNotNull(fileSystem.getFromListCache("bucket", "prefix/", "key"));
+ assertNull(fs2.getFromListCache("bucket", "prefix/", "key"));
+ }
}
diff --git
a/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioListCacheTest.java
b/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioListCacheTest.java
new file mode 100644
index 0000000000..6de0bc5e65
--- /dev/null
+++
b/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioListCacheTest.java
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hop.vfs.minio;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+import java.time.Instant;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import org.apache.commons.vfs2.FileType;
+import org.junit.jupiter.api.Test;
+
+class MinioListCacheTest {
+
+ @Test
+ void testDefaultTtl() {
+ assertEquals(10_000L, MinioListCache.DEFAULT_TTL_MS);
+ }
+
+ @Test
+ void testDefaultConstructorUsesTtl() {
+ MinioListCache cache = new MinioListCache();
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("key", new MinioListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("bucket", "prefix/", entries);
+ assertNotNull(cache.get("bucket", "prefix/", "key"));
+ }
+
+ @Test
+ void testPutAndGet() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put(
+ "path/to/file.txt", new MinioListCache.ChildInfo(FileType.FILE, 1024,
Instant.now()));
+ entries.put("path/to/sub/", new MinioListCache.ChildInfo(FileType.FOLDER,
0, Instant.EPOCH));
+
+ cache.put("mybucket", "path/to/", entries);
+
+ MinioListCache.ChildInfo fileInfo = cache.get("mybucket", "path/to/",
"path/to/file.txt");
+ assertNotNull(fileInfo);
+ assertEquals(FileType.FILE, fileInfo.type);
+ assertEquals(1024, fileInfo.size);
+
+ MinioListCache.ChildInfo folderInfo = cache.get("mybucket", "path/to/",
"path/to/sub/");
+ assertNotNull(folderInfo);
+ assertEquals(FileType.FOLDER, folderInfo.type);
+ assertEquals(0, folderInfo.size);
+ }
+
+ @Test
+ void testGetMissingKey() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put(
+ "path/to/file.txt", new MinioListCache.ChildInfo(FileType.FILE, 100,
Instant.now()));
+ cache.put("mybucket", "path/to/", entries);
+
+ assertNull(cache.get("mybucket", "path/to/", "path/to/other.txt"));
+ }
+
+ @Test
+ void testGetMissingBucket() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ assertNull(cache.get("nonexistent", "path/", "path/file"));
+ }
+
+ @Test
+ void testExpiry() throws InterruptedException {
+ MinioListCache cache = new MinioListCache(50L);
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("key", new MinioListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("bucket", "prefix/", entries);
+
+ assertNotNull(cache.get("bucket", "prefix/", "key"));
+
+ Thread.sleep(100);
+
+ assertNull(cache.get("bucket", "prefix/", "key"));
+ }
+
+ @Test
+ void testInvalidate() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("key", new MinioListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("bucket", "prefix/", entries);
+
+ assertNotNull(cache.get("bucket", "prefix/", "key"));
+ cache.invalidate("bucket", "prefix/");
+ assertNull(cache.get("bucket", "prefix/", "key"));
+ }
+
+ @Test
+ void testInvalidateParentOf() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("dir/file.txt", new MinioListCache.ChildInfo(FileType.FILE,
10, Instant.now()));
+ cache.put("bucket", "dir/", entries);
+
+ assertNotNull(cache.get("bucket", "dir/", "dir/file.txt"));
+ cache.invalidateParentOf("bucket", "dir/file.txt");
+ assertNull(cache.get("bucket", "dir/", "dir/file.txt"));
+ }
+
+ @Test
+ void testPutEmptyMap() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ cache.put("bucket", "prefix/", new LinkedHashMap<>());
+ assertNull(cache.get("bucket", "prefix/", "anything"));
+ }
+
+ @Test
+ void testPutNull() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ cache.put("bucket", "prefix/", null);
+ assertNull(cache.get("bucket", "prefix/", "anything"));
+ }
+
+ @Test
+ void testParentPrefix() {
+ assertEquals("path/to/", MinioListCache.parentPrefix("path/to/file"));
+ assertEquals("", MinioListCache.parentPrefix("file"));
+ assertEquals("", MinioListCache.parentPrefix(""));
+ assertEquals("", MinioListCache.parentPrefix(null));
+ assertEquals("a/b/", MinioListCache.parentPrefix("a/b/c"));
+ }
+
+ @Test
+ void testParentPrefixWithTrailingSlash() {
+ assertEquals("path/to/", MinioListCache.parentPrefix("path/to/dir/"));
+ assertEquals("", MinioListCache.parentPrefix("topdir/"));
+ }
+
+ @Test
+ void testCustomTtl() {
+ MinioListCache cache = new MinioListCache(5_000L);
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("key", new MinioListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ cache.put("bucket", "prefix/", entries);
+ assertNotNull(cache.get("bucket", "prefix/", "key"));
+ }
+
+ @Test
+ void testDifferentBucketsSamePrefix() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ Map<String, MinioListCache.ChildInfo> entries1 = new LinkedHashMap<>();
+ entries1.put("prefix/a", new MinioListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ Map<String, MinioListCache.ChildInfo> entries2 = new LinkedHashMap<>();
+ entries2.put("prefix/b", new MinioListCache.ChildInfo(FileType.FILE, 20,
Instant.now()));
+
+ cache.put("bucket1", "prefix/", entries1);
+ cache.put("bucket2", "prefix/", entries2);
+
+ assertNotNull(cache.get("bucket1", "prefix/", "prefix/a"));
+ assertNull(cache.get("bucket1", "prefix/", "prefix/b"));
+ assertNotNull(cache.get("bucket2", "prefix/", "prefix/b"));
+ assertNull(cache.get("bucket2", "prefix/", "prefix/a"));
+ }
+
+ @Test
+ void testInvalidateNonExistentEntry() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ cache.invalidate("nonexistent", "prefix/");
+ cache.invalidateParentOf("nonexistent", "some/key");
+ }
+
+ @Test
+ void testChildInfoFields() {
+ Instant now = Instant.now();
+ MinioListCache.ChildInfo info = new
MinioListCache.ChildInfo(FileType.FILE, 42, now);
+ assertEquals(FileType.FILE, info.type);
+ assertEquals(42, info.size);
+ assertEquals(now, info.lastModified);
+ }
+
+ @Test
+ void testChildInfoFolder() {
+ MinioListCache.ChildInfo info = new
MinioListCache.ChildInfo(FileType.FOLDER, 0, Instant.EPOCH);
+ assertEquals(FileType.FOLDER, info.type);
+ assertEquals(0, info.size);
+ assertEquals(Instant.EPOCH, info.lastModified);
+ }
+
+ @Test
+ void testPutOverwritesPreviousEntry() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ Map<String, MinioListCache.ChildInfo> entries1 = new LinkedHashMap<>();
+ entries1.put("key", new MinioListCache.ChildInfo(FileType.FILE, 100,
Instant.now()));
+ cache.put("bucket", "prefix/", entries1);
+
+ assertEquals(100, cache.get("bucket", "prefix/", "key").size);
+
+ Map<String, MinioListCache.ChildInfo> entries2 = new LinkedHashMap<>();
+ entries2.put("key", new MinioListCache.ChildInfo(FileType.FILE, 200,
Instant.now()));
+ cache.put("bucket", "prefix/", entries2);
+
+ assertEquals(200, cache.get("bucket", "prefix/", "key").size);
+ }
+
+ @Test
+ void testInvalidateOnlyAffectsTargetPrefix() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ Map<String, MinioListCache.ChildInfo> entries1 = new LinkedHashMap<>();
+ entries1.put("a/file", new MinioListCache.ChildInfo(FileType.FILE, 10,
Instant.now()));
+ Map<String, MinioListCache.ChildInfo> entries2 = new LinkedHashMap<>();
+ entries2.put("b/file", new MinioListCache.ChildInfo(FileType.FILE, 20,
Instant.now()));
+
+ cache.put("bucket", "a/", entries1);
+ cache.put("bucket", "b/", entries2);
+
+ cache.invalidate("bucket", "a/");
+ assertNull(cache.get("bucket", "a/", "a/file"));
+ assertNotNull(cache.get("bucket", "b/", "b/file"));
+ }
+
+ @Test
+ void testMultipleChildrenInSamePrefix() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("dir/file1.txt", new MinioListCache.ChildInfo(FileType.FILE,
100, Instant.now()));
+ entries.put("dir/file2.txt", new MinioListCache.ChildInfo(FileType.FILE,
200, Instant.now()));
+ entries.put("dir/sub/", new MinioListCache.ChildInfo(FileType.FOLDER, 0,
Instant.EPOCH));
+
+ cache.put("bucket", "dir/", entries);
+
+ assertNotNull(cache.get("bucket", "dir/", "dir/file1.txt"));
+ assertNotNull(cache.get("bucket", "dir/", "dir/file2.txt"));
+ assertNotNull(cache.get("bucket", "dir/", "dir/sub/"));
+ assertEquals(100, cache.get("bucket", "dir/", "dir/file1.txt").size);
+ assertEquals(200, cache.get("bucket", "dir/", "dir/file2.txt").size);
+ assertEquals(FileType.FOLDER, cache.get("bucket", "dir/",
"dir/sub/").type);
+ }
+
+ @Test
+ void testRootPrefixEmpty() {
+ MinioListCache cache = new MinioListCache(60_000L);
+ Map<String, MinioListCache.ChildInfo> entries = new LinkedHashMap<>();
+ entries.put("topfile.txt", new MinioListCache.ChildInfo(FileType.FILE, 50,
Instant.now()));
+ cache.put("bucket", "", entries);
+
+ assertNotNull(cache.get("bucket", "", "topfile.txt"));
+ }
+}
diff --git
a/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioMetaTest.java
b/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioMetaTest.java
index 78bb8e7a40..e408d604e9 100644
---
a/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioMetaTest.java
+++
b/plugins/tech/minio/src/test/java/org/apache/hop/vfs/minio/MinioMetaTest.java
@@ -40,6 +40,7 @@ class MinioMetaTest {
void testDefaultConstructor() {
assertNotNull(minioMeta, "MinioMeta should not be null");
assertEquals("5242880", minioMeta.getPartSize(), "Default part size should
be 5MB");
+ assertEquals("5", minioMeta.getCacheTtlSeconds(), "Default cache TTL
should be 5 seconds");
}
@Test
@@ -101,6 +102,16 @@ class MinioMetaTest {
assertEquals(partSize, minioMeta.getPartSize(), "Part size should be set
correctly");
}
+ @Test
+ void testCacheTtlSeconds() {
+ String cacheTtlSeconds = "30";
+ minioMeta.setCacheTtlSeconds(cacheTtlSeconds);
+ assertEquals(
+ cacheTtlSeconds,
+ minioMeta.getCacheTtlSeconds(),
+ "Cache TTL (seconds) should be set correctly");
+ }
+
@Test
void testAllProperties() {
String description = "Test MinIO connection";
@@ -111,6 +122,7 @@ class MinioMetaTest {
String secretKey = "test-secret-key";
String region = "us-west-2";
String partSize = "10485760";
+ String cacheTtlSeconds = "60";
minioMeta.setDescription(description);
minioMeta.setEndPointHostname(hostname);
@@ -120,6 +132,7 @@ class MinioMetaTest {
minioMeta.setSecretKey(secretKey);
minioMeta.setRegion(region);
minioMeta.setPartSize(partSize);
+ minioMeta.setCacheTtlSeconds(cacheTtlSeconds);
assertEquals(description, minioMeta.getDescription(), "Description should
be set correctly");
assertEquals(
@@ -130,6 +143,10 @@ class MinioMetaTest {
assertEquals(secretKey, minioMeta.getSecretKey(), "Secret key should be
set correctly");
assertEquals(region, minioMeta.getRegion(), "Region should be set
correctly");
assertEquals(partSize, minioMeta.getPartSize(), "Part size should be set
correctly");
+ assertEquals(
+ cacheTtlSeconds,
+ minioMeta.getCacheTtlSeconds(),
+ "Cache TTL (seconds) should be set correctly");
}
@Test
@@ -141,6 +158,7 @@ class MinioMetaTest {
minioMeta.setSecretKey(null);
minioMeta.setRegion(null);
minioMeta.setPartSize(null);
+ minioMeta.setCacheTtlSeconds(null);
assertNull(minioMeta.getDescription(), "Description should be null");
assertNull(minioMeta.getEndPointHostname(), "EndPoint hostname should be
null");
@@ -149,5 +167,6 @@ class MinioMetaTest {
assertNull(minioMeta.getSecretKey(), "Secret key should be null");
assertNull(minioMeta.getRegion(), "Region should be null");
assertNull(minioMeta.getPartSize(), "Part size should be null");
+ assertNull(minioMeta.getCacheTtlSeconds(), "Cache TTL should be null");
}
}