This is an automated email from the ASF dual-hosted git repository.
eamonford pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git
The following commit(s) were added to refs/heads/dev by this push:
new 7e04251 SDAP-300: Fix bug that prevented collection manager from
seeing files in a directory when the path had no glob-style wildcard character.
Also add back support for scanning dirs recursively. (#27)
7e04251 is described below
commit 7e042510eea58a423aad4d4634d69ae01ce73146
Author: Eamon Ford <[email protected]>
AuthorDate: Tue Jan 5 10:05:04 2021 -0800
SDAP-300: Fix bug that prevented collection manager from seeing files in a
directory when the path had no glob-style wildcard character. Also add back
support for scanning dirs recursively. (#27)
Co-authored-by: Eamon Ford <[email protected]>
---
.../collection_manager/entities/Collection.py | 7 ++++---
.../collection_manager/services/CollectionWatcher.py | 14 +++++++++++---
2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/collection_manager/collection_manager/entities/Collection.py
b/collection_manager/collection_manager/entities/Collection.py
index 7a45b66..389e135 100644
--- a/collection_manager/collection_manager/entities/Collection.py
+++ b/collection_manager/collection_manager/entities/Collection.py
@@ -1,11 +1,12 @@
import os
-from urllib.parse import urlparse
+import pathlib
from dataclasses import dataclass
from datetime import datetime
+from enum import Enum
from fnmatch import fnmatch
from glob import glob
from typing import List, Optional
-from enum import Enum
+from urllib.parse import urlparse
from collection_manager.entities.exceptions import MissingValueCollectionError
@@ -68,6 +69,6 @@ class Collection:
raise IsADirectoryError()
if os.path.isdir(self.path):
- return os.path.dirname(file_path) == self.path
+ return pathlib.Path(self.path) in
pathlib.Path(file_path).parents
else:
return fnmatch(file_path, self.path)
diff --git
a/collection_manager/collection_manager/services/CollectionWatcher.py
b/collection_manager/collection_manager/services/CollectionWatcher.py
index b1aaf4e..68b013a 100644
--- a/collection_manager/collection_manager/services/CollectionWatcher.py
+++ b/collection_manager/collection_manager/services/CollectionWatcher.py
@@ -116,11 +116,16 @@ class CollectionWatcher:
logger.info(f"Scanning files for {len(collections)} collections...")
start = time.perf_counter()
for collection in collections:
- for granule_path in glob(collection.path, recursive=True):
+ for granule_path in self._get_files_at_path(collection.path):
modified_time = int(os.path.getmtime(granule_path))
await self._granule_updated_callback(granule_path,
modified_time, collection)
logger.info(f"Finished scanning files in {time.perf_counter() - start}
seconds.")
+ def _get_files_at_path(self, path: str) -> List[str]:
+ if os.path.isfile(path):
+ return [path]
+ return [f for f in glob(path + '/**', recursive=True) if
os.path.isfile(f)]
+
async def _reload_and_reschedule(self):
try:
updated_collections = self._get_updated_collections()
@@ -191,11 +196,14 @@ class _GranuleEventHandler(FileSystemEventHandler):
def on_created(self, event):
super().on_created(event)
- self._handle_event(event)
+ if isinstance(event, S3Event) or not event.is_directory:
+ self._handle_event(event)
def on_modified(self, event):
super().on_modified(event)
- self._handle_event(event)
+
+ if isinstance(event, S3Event) or not event.is_directory:
+ self._handle_event(event)
def _handle_event(self, event):
path = event.src_path