This is an automated email from the ASF dual-hosted git repository.

davsclaus pushed a commit to branch fo
in repository https://gitbox.apache.org/repos/asf/camel.git

commit b4bfcce123937d292bfa2dcec3d481c56a0f5b7a
Author: Claus Ibsen <[email protected]>
AuthorDate: Sun Dec 29 12:33:51 2024 +0100

    CAMEL-17648: camel-file - Optimize file consumer when filtering file names.
---
 .../apache/camel/component/file/FileConsumer.java  | 135 +++++++++++++++++----
 .../camel/component/file/GenericFileConsumer.java  |  18 ++-
 2 files changed, 127 insertions(+), 26 deletions(-)

diff --git 
a/components/camel-file/src/main/java/org/apache/camel/component/file/FileConsumer.java
 
b/components/camel-file/src/main/java/org/apache/camel/component/file/FileConsumer.java
index 1cc3327ef1c..890231d9117 100644
--- 
a/components/camel-file/src/main/java/org/apache/camel/component/file/FileConsumer.java
+++ 
b/components/camel-file/src/main/java/org/apache/camel/component/file/FileConsumer.java
@@ -27,6 +27,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.function.Supplier;
 
 import org.apache.camel.Exchange;
 import org.apache.camel.Message;
@@ -39,6 +40,7 @@ import org.apache.camel.resume.ResumeStrategy;
 import org.apache.camel.support.resume.Resumables;
 import org.apache.camel.util.FileUtil;
 import org.apache.camel.util.ObjectHelper;
+import org.apache.camel.util.function.Suppliers;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -109,12 +111,11 @@ public class FileConsumer extends 
GenericFileConsumer<File> implements ResumeAwa
             }
 
             // creates a generic file
-            GenericFile<File> gf
-                    = asGenericFile(endpointPath, file, 
getEndpoint().getCharset(), getEndpoint().isProbeContentType());
+            Supplier<GenericFile<File>> gf = Suppliers.memorize(
+                    () -> asGenericFile(endpointPath, file, 
getEndpoint().getCharset(), getEndpoint().isProbeContentType()));
 
             if (resumeStrategy != null) {
-                final ResumeAdapter adapter = setupResumeStrategy(gf);
-
+                final ResumeAdapter adapter = setupResumeStrategy(gf.get());
                 if (adapter instanceof DirectoryEntriesResumeAdapter 
directoryEntriesResumeAdapter) {
                     LOG.trace("Running the resume process for file {}", file);
                     if (directoryEntriesResumeAdapter.resume(file)) {
@@ -131,7 +132,8 @@ public class FileConsumer extends GenericFileConsumer<File> 
implements ResumeAwa
         return false;
     }
 
-    private boolean processEntry(List<GenericFile<File>> fileList, int depth, 
File file, GenericFile<File> gf, File[] files) {
+    private boolean processEntry(
+            List<GenericFile<File>> fileList, int depth, File file, 
Supplier<GenericFile<File>> gf, File[] files) {
         if (file.isDirectory()) {
             return processDirectoryEntry(fileList, depth, file, gf, files);
         } else {
@@ -141,37 +143,128 @@ public class FileConsumer extends 
GenericFileConsumer<File> implements ResumeAwa
         return false;
     }
 
-    private void processFileEntry(List<GenericFile<File>> fileList, int depth, 
File file, GenericFile<File> gf, File[] files) {
+    private void processFileEntry(
+            List<GenericFile<File>> fileList, int depth, File file, 
Supplier<GenericFile<File>> gf, File[] files) {
         // Windows can report false to a file on a share so regard it
         // always as a file (if it is not a directory)
-        if (depth >= endpoint.minDepth && isValidFile(gf, false, files)) {
-            LOG.trace("Adding valid file: {}", file);
-            // matched file so add
-            if (extendedAttributes != null) {
-                Path path = file.toPath();
-                Map<String, Object> allAttributes = new HashMap<>();
-                for (String attribute : extendedAttributes) {
-                    readAttributes(file, path, allAttributes, attribute);
+        if (depth >= endpoint.minDepth) {
+            boolean valid = isValidFile(gf, file, false, files);
+            if (valid) {
+                LOG.trace("Adding valid file: {}", file);
+                if (extendedAttributes != null) {
+                    Path path = file.toPath();
+                    Map<String, Object> allAttributes = new HashMap<>();
+                    for (String attribute : extendedAttributes) {
+                        readAttributes(file, path, allAttributes, attribute);
+                    }
+                    gf.get().setExtendedAttributes(allAttributes);
                 }
+                fileList.add(gf.get());
+            }
+        }
+    }
 
-                gf.setExtendedAttributes(allAttributes);
+    private boolean processDirectoryEntry(
+            List<GenericFile<File>> fileList, int depth, File file, 
Supplier<GenericFile<File>> gf, File[] files) {
+        if (endpoint.isRecursive() && depth < endpoint.getMaxDepth()) {
+            boolean valid = isValidFile(gf, file, true, files);
+            if (valid) {
+                boolean canPollMore = pollDirectory(file, fileList, depth);
+                return !canPollMore;
             }
+        }
+        return false;
+    }
 
-            fileList.add(gf);
+    @Override
+    protected boolean isPreMatched() {
+        // the camel-file is optimized for pre-matching
+        return true;
+    }
+
+    protected boolean isValidFile(Supplier<GenericFile<File>> gf, File file, 
boolean isDirectory, File[] files) {
+        if (!isMatched(file, isDirectory, files)) {
+            LOG.trace("File did not match. Will skip this file: {}", file);
+            return false;
         }
+        // optimized check done continue to use general check
+        return super.isValidFile(gf.get(), isDirectory, files);
     }
 
-    private boolean processDirectoryEntry(
-            List<GenericFile<File>> fileList, int depth, File file, 
GenericFile<File> gf, File[] files) {
-        if (endpoint.isRecursive() && depth < endpoint.getMaxDepth() && 
isValidFile(gf, true, files)) {
-            boolean canPollMore = pollDirectory(file, fileList, depth);
-            if (!canPollMore) {
+    /**
+     * Optimized check for is valid that uses java.io.File objects only, as 
creating the GenericFile object has overhead
+     * when polling from file systems that contains a lot of files.
+     */
+    private boolean isMatched(File file, boolean isDirectory, File[] files) {
+        String name = file.getName();
+
+        if (!isMatchedHiddenFile(file, isDirectory)) {
+            // folders/names starting with dot is always skipped (eg. ".", 
".camel",
+            // ".camelLock")
+            return false;
+        }
+
+        // lock files should be skipped
+        if (name.endsWith(FileComponent.DEFAULT_LOCK_FILE_POSTFIX)) {
+            return false;
+        }
+
+        // check if file matches inclusion/exclusion
+        if (!isDirectory && hasInclusionsOrExclusions(file, name)) {
+            return false;
+        }
+
+        // return true to allow default valid check to process
+        return true;
+    }
+
+    private boolean hasInclusionsOrExclusions(File file, String name) {
+        // exclude take precedence over include
+        if (endpoint.getExcludePattern() != null) {
+            if (endpoint.getExcludePattern().matcher(name).matches()) {
+                return true;
+            }
+        }
+        String fname = file.getName().toLowerCase();
+        if (endpoint.getExcludeExt() != null) {
+            if (hasExtExlusions(fname)) {
+                return true;
+            }
+        }
+        if (endpoint.getIncludePattern() != null) {
+            if (!endpoint.getIncludePattern().matcher(name).matches()) {
+                return true;
+            }
+        }
+        if (endpoint.getIncludeExt() != null) {
+            if (hasExtInclusions(fname)) {
                 return true;
             }
         }
         return false;
     }
 
+    protected boolean isMatchedHiddenFile(File file, boolean isDirectory) {
+        String name = file.getName();
+        if (isDirectory) {
+            if (!name.startsWith(".")) {
+                return true;
+            }
+            return getEndpoint().isIncludeHiddenDirs() && 
!FileConstants.DEFAULT_SUB_FOLDER.equals(name);
+        }
+
+        if (getEndpoint().isIncludeHiddenFiles()) {
+            return true;
+        } else {
+            // folders/names starting with dot is always skipped (eg. ".", 
".camel",
+            // ".camelLock")
+            if (name.startsWith(".")) {
+                return false;
+            }
+            return true;
+        }
+    }
+
     private ResumeAdapter setupResumeStrategy(GenericFile<File> gf) {
         ResumeAdapter adapter = resumeStrategy.getAdapter();
         LOG.trace("Checking the resume adapter: {}", adapter);
diff --git 
a/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConsumer.java
 
b/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConsumer.java
index 87d51223983..6cbe9d2ee43 100644
--- 
a/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConsumer.java
+++ 
b/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConsumer.java
@@ -585,8 +585,6 @@ public abstract class GenericFileConsumer<T> extends 
ScheduledBatchPollingConsum
      * @return             <tt>true</tt> to include the file, <tt>false</tt> 
to skip it
      */
     protected boolean isValidFile(GenericFile<T> file, boolean isDirectory, 
T[] files) {
-        String absoluteFilePath = file.getAbsoluteFilePath();
-
         if (!isMatched(file, isDirectory, files)) {
             LOG.trace("File did not match. Will skip this file: {}", file);
             return false;
@@ -598,6 +596,7 @@ public abstract class GenericFileConsumer<T> extends 
ScheduledBatchPollingConsum
         }
 
         // check if file is already in progress
+        String absoluteFilePath = file.getAbsoluteFilePath();
         if (endpoint.getInProgressRepository().contains(absoluteFilePath)) {
             if (LOG.isTraceEnabled()) {
                 LOG.trace("Skipping as file is already in progress: {}", 
file.getFileName());
@@ -661,6 +660,13 @@ public abstract class GenericFileConsumer<T> extends 
ScheduledBatchPollingConsum
         return true;
     }
 
+    /**
+     * Whether the isMatched has been pre-matched such as done by camel-file 
component.
+     */
+    protected boolean isPreMatched() {
+        return false;
+    }
+
     /**
      * Strategy to perform file matching based on endpoint configuration.
      * <p/>
@@ -679,6 +685,7 @@ public abstract class GenericFileConsumer<T> extends 
ScheduledBatchPollingConsum
     protected boolean isMatched(GenericFile<T> file, boolean isDirectory, T[] 
files) {
         String name = file.getFileNameOnly();
 
+        // this has already been pre-checked
         if (!isMatchedHiddenFile(file, isDirectory)) {
             // folders/names starting with dot is always skipped (eg. ".", 
".camel",
             // ".camelLock")
@@ -717,7 +724,8 @@ public abstract class GenericFileConsumer<T> extends 
ScheduledBatchPollingConsum
             return true;
         }
 
-        if (hasInclusionsOrExclusions(file, name)) {
+        // this has already been pre-checked
+        if (!isPreMatched() && hasInclusionsOrExclusions(file, name)) {
             return false;
         }
 
@@ -789,7 +797,7 @@ public abstract class GenericFileConsumer<T> extends 
ScheduledBatchPollingConsum
         return false;
     }
 
-    private boolean hasExtInclusions(String fname) {
+    protected boolean hasExtInclusions(String fname) {
         boolean any = false;
         for (String include : includeExt) {
             any |= fname.endsWith("." + include);
@@ -800,7 +808,7 @@ public abstract class GenericFileConsumer<T> extends 
ScheduledBatchPollingConsum
         return false;
     }
 
-    private boolean hasExtExlusions(String fname) {
+    protected boolean hasExtExlusions(String fname) {
         for (String exclude : excludeExt) {
             if (fname.endsWith("." + exclude)) {
                 return true;

Reply via email to