This is an automated email from the ASF dual-hosted git repository. tloubrieu pushed a commit to branch ascending_latitudes in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git
commit 7448280e070b28efbf99ec49b3c2954aac052c6e Author: Eamon Ford <[email protected]> AuthorDate: Tue Jan 5 10:05:04 2021 -0800 SDAP-300: Fix bug that prevented collection manager from seeing files in a directory when the path had no glob-style wildcard character. Also add back support for scanning dirs recursively. (#27) Co-authored-by: Eamon Ford <[email protected]> --- .../collection_manager/entities/Collection.py | 7 ++++--- .../collection_manager/services/CollectionWatcher.py | 14 +++++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/collection_manager/collection_manager/entities/Collection.py b/collection_manager/collection_manager/entities/Collection.py index 7a45b66..389e135 100644 --- a/collection_manager/collection_manager/entities/Collection.py +++ b/collection_manager/collection_manager/entities/Collection.py @@ -1,11 +1,12 @@ import os -from urllib.parse import urlparse +import pathlib from dataclasses import dataclass from datetime import datetime +from enum import Enum from fnmatch import fnmatch from glob import glob from typing import List, Optional -from enum import Enum +from urllib.parse import urlparse from collection_manager.entities.exceptions import MissingValueCollectionError @@ -68,6 +69,6 @@ class Collection: raise IsADirectoryError() if os.path.isdir(self.path): - return os.path.dirname(file_path) == self.path + return pathlib.Path(self.path) in pathlib.Path(file_path).parents else: return fnmatch(file_path, self.path) diff --git a/collection_manager/collection_manager/services/CollectionWatcher.py b/collection_manager/collection_manager/services/CollectionWatcher.py index b1aaf4e..68b013a 100644 --- a/collection_manager/collection_manager/services/CollectionWatcher.py +++ b/collection_manager/collection_manager/services/CollectionWatcher.py @@ -116,11 +116,16 @@ class CollectionWatcher: logger.info(f"Scanning files for {len(collections)} collections...") start = time.perf_counter() for collection in collections: - for granule_path in glob(collection.path, recursive=True): + for granule_path in self._get_files_at_path(collection.path): modified_time = int(os.path.getmtime(granule_path)) await self._granule_updated_callback(granule_path, modified_time, collection) logger.info(f"Finished scanning files in {time.perf_counter() - start} seconds.") + def _get_files_at_path(self, path: str) -> List[str]: + if os.path.isfile(path): + return [path] + return [f for f in glob(path + '/**', recursive=True) if os.path.isfile(f)] + async def _reload_and_reschedule(self): try: updated_collections = self._get_updated_collections() @@ -191,11 +196,14 @@ class _GranuleEventHandler(FileSystemEventHandler): def on_created(self, event): super().on_created(event) - self._handle_event(event) + if isinstance(event, S3Event) or not event.is_directory: + self._handle_event(event) def on_modified(self, event): super().on_modified(event) - self._handle_event(event) + + if isinstance(event, S3Event) or not event.is_directory: + self._handle_event(event) def _handle_event(self, event): path = event.src_path
