This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git


The following commit(s) were added to refs/heads/dev by this push:
     new 7e04251  SDAP-300: Fix bug that prevented collection manager from 
seeing files in a directory when the path had no glob-style wildcard character. 
Also add back support for scanning dirs recursively. (#27)
7e04251 is described below

commit 7e042510eea58a423aad4d4634d69ae01ce73146
Author: Eamon Ford <[email protected]>
AuthorDate: Tue Jan 5 10:05:04 2021 -0800

    SDAP-300: Fix bug that prevented collection manager from seeing files in a 
directory when the path had no glob-style wildcard character. Also add back 
support for scanning dirs recursively. (#27)
    
    Co-authored-by: Eamon Ford <[email protected]>
---
 .../collection_manager/entities/Collection.py              |  7 ++++---
 .../collection_manager/services/CollectionWatcher.py       | 14 +++++++++++---
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/collection_manager/collection_manager/entities/Collection.py 
b/collection_manager/collection_manager/entities/Collection.py
index 7a45b66..389e135 100644
--- a/collection_manager/collection_manager/entities/Collection.py
+++ b/collection_manager/collection_manager/entities/Collection.py
@@ -1,11 +1,12 @@
 import os
-from urllib.parse import urlparse
+import pathlib
 from dataclasses import dataclass
 from datetime import datetime
+from enum import Enum
 from fnmatch import fnmatch
 from glob import glob
 from typing import List, Optional
-from enum import Enum
+from urllib.parse import urlparse
 
 from collection_manager.entities.exceptions import MissingValueCollectionError
 
@@ -68,6 +69,6 @@ class Collection:
                 raise IsADirectoryError()
 
             if os.path.isdir(self.path):
-                return os.path.dirname(file_path) == self.path
+                return pathlib.Path(self.path) in 
pathlib.Path(file_path).parents
             else:
                 return fnmatch(file_path, self.path)
diff --git 
a/collection_manager/collection_manager/services/CollectionWatcher.py 
b/collection_manager/collection_manager/services/CollectionWatcher.py
index b1aaf4e..68b013a 100644
--- a/collection_manager/collection_manager/services/CollectionWatcher.py
+++ b/collection_manager/collection_manager/services/CollectionWatcher.py
@@ -116,11 +116,16 @@ class CollectionWatcher:
         logger.info(f"Scanning files for {len(collections)} collections...")
         start = time.perf_counter()
         for collection in collections:
-            for granule_path in glob(collection.path, recursive=True):
+            for granule_path in self._get_files_at_path(collection.path):
                 modified_time = int(os.path.getmtime(granule_path))
                 await self._granule_updated_callback(granule_path, 
modified_time, collection)
         logger.info(f"Finished scanning files in {time.perf_counter() - start} 
seconds.")
 
+    def _get_files_at_path(self, path: str) -> List[str]:
+        if os.path.isfile(path):
+            return [path]
+        return [f for f in glob(path + '/**', recursive=True) if 
os.path.isfile(f)]
+
     async def _reload_and_reschedule(self):
         try:
             updated_collections = self._get_updated_collections()
@@ -191,11 +196,14 @@ class _GranuleEventHandler(FileSystemEventHandler):
 
     def on_created(self, event):
         super().on_created(event)
-        self._handle_event(event)
+        if isinstance(event, S3Event) or not event.is_directory:
+            self._handle_event(event)
 
     def on_modified(self, event):
         super().on_modified(event)
-        self._handle_event(event)
+
+        if isinstance(event, S3Event) or not event.is_directory:
+            self._handle_event(event)
 
     def _handle_event(self, event):
         path = event.src_path

Reply via email to