This is an automated email from the ASF dual-hosted git repository.

machristie pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/airavata-django-portal.git

commit a248cbd7d37730547f5827c43bf371861ca2a8e3
Author: Marcus Christie <[email protected]>
AuthorDate: Mon Apr 17 15:27:27 2023 -0400

    AIRAVATA-3694 Basic archive_user_data management command
---
 django_airavata/apps/admin/management/__init__.py  |   0
 .../apps/admin/management/commands/__init__.py     |   0
 .../admin/management/commands/archive_user_data.py | 123 +++++++++++++++++++++
 .../apps/admin/migrations/0001_initial.py          |  35 ++++++
 django_airavata/apps/admin/models.py               |  15 +++
 django_airavata/settings.py                        |   4 +
 django_airavata/settings_local.py.sample           |   4 +
 7 files changed, 181 insertions(+)

diff --git a/django_airavata/apps/admin/management/__init__.py 
b/django_airavata/apps/admin/management/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/django_airavata/apps/admin/management/commands/__init__.py 
b/django_airavata/apps/admin/management/commands/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git 
a/django_airavata/apps/admin/management/commands/archive_user_data.py 
b/django_airavata/apps/admin/management/commands/archive_user_data.py
new file mode 100644
index 00000000..0d45e931
--- /dev/null
+++ b/django_airavata/apps/admin/management/commands/archive_user_data.py
@@ -0,0 +1,123 @@
+import datetime
+import os
+import shutil
+import tarfile
+import tempfile
+from pathlib import Path
+from typing import Iterator
+
+from django.conf import settings
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from django.utils import timezone
+
+from django_airavata.apps.admin import models
+
+
+class Command(BaseCommand):
+    help = "Create an archive of user data directories and optionally clean 
them up"
+
+    def add_arguments(self, parser):
+        parser.add_argument('--dry-run',
+                            action='store_true',
+                            help="Print the list of files/directories that 
would be archived then exit",
+                            default=False)
+
+    def handle(self, *args, **options):
+        max_age_setting = getattr(settings, 
"GATEWAY_USER_DATA_ARCHIVE_MAX_AGE", None)
+        if max_age_setting is None:
+            raise CommandError("Setting GATEWAY_USER_DATA_ARCHIVE_MAX_AGE is 
not configured")
+
+        max_age = timezone.now() - datetime.timedelta(**max_age_setting)
+        entries_to_archive = self.get_archive_entries(older_than=max_age)
+        gateway_id = settings.GATEWAY_ID
+
+        archive_directory = Path(settings.GATEWAY_USER_DATA_ARCHIVE_DIRECTORY)
+        archive_directory.mkdir(exist_ok=True)
+
+        with tempfile.TemporaryDirectory(dir=archive_directory) as tmpdir:
+            archive_basename = 
f"archive_{gateway_id}_older_than_{max_age.strftime('%Y-%m-%d-%H-%M-%S')}"
+            archive_list_filename = f"{archive_basename}.txt"
+            archive_list_filepath = os.path.join(tmpdir, archive_list_filename)
+            with open(archive_list_filepath, "wt") as archive_list_file:
+                for entry in entries_to_archive:
+                    archive_list_file.write(f"{entry.path}\n")
+
+            # if dry run, just print file and exit
+            if options['dry_run']:
+                self.stdout.write(f"DRY RUN: printing {archive_list_filename}, 
then exiting")
+                with open(os.path.join(tmpdir, archive_list_filename)) as 
archive_list_file:
+                    for line in archive_list_file:
+                        self.stdout.write(line)
+                self.stdout.write(self.style.SUCCESS("DRY RUN: exiting now"))
+                return
+
+            # otherwise, generate a tarball in tmpdir
+            archive_tarball_filename = f"{archive_basename}.tgz"
+            archive_tarball_filepath = os.path.join(tmpdir, 
archive_tarball_filename)
+            with tarfile.open(archive_tarball_filepath, "w:gz") as tarball:
+                with open(os.path.join(tmpdir, archive_list_filename)) as 
archive_list_file:
+                    for line in archive_list_file:
+                        tarball.add(line.strip())
+            self.stdout.write(self.style.SUCCESS(f"Created tarball: 
{archive_tarball_filename}"))
+
+            # Move the archive files into the final destination
+            shutil.move(archive_list_filepath, archive_directory / 
archive_list_filename)
+            shutil.move(archive_tarball_filepath, archive_directory / 
archive_tarball_filename)
+
+        with transaction.atomic():
+            user_data_archive = models.UserDataArchive(
+                archive_name=archive_tarball_filename,
+                archive_path=os.fspath(archive_directory / 
archive_list_filename),
+                max_modification_time=max_age)
+            user_data_archive.save()
+            # delete archived entries
+            with open(archive_directory / archive_list_filename) as 
archive_list_file:
+                for archive_path in archive_list_file:
+                    archive_path = archive_path.strip()
+                    if os.path.isfile(archive_path):
+                        os.remove(archive_path)
+                    else:
+                        shutil.rmtree(archive_path)
+                    archive_entry = 
models.UserDataArchiveEntry(user_data_archive=user_data_archive, 
entry_path=archive_path)
+                    archive_entry.save()
+
+        self.stdout.write(self.style.SUCCESS("Successfully removed archived 
user data"))
+
+    def get_archive_entries(self, older_than: datetime.datetime) -> 
Iterator[os.DirEntry]:
+
+        GATEWAY_USER_DIR = 
settings.USER_STORAGES['default']['OPTIONS']['directory']
+
+        with os.scandir(GATEWAY_USER_DIR) as user_dirs:
+            for user_dir_entry in user_dirs:
+                # Skip over any files (shouldn't be any but who knows)
+                if not user_dir_entry.is_dir():
+                    continue
+                # Skip over shared directories
+                if self._is_shared_directory(user_dir_entry):
+                    continue
+                with os.scandir(user_dir_entry.path) as project_dirs:
+                    for project_dir_entry in project_dirs:
+                        yield from self._scan_project_dir_for_archive_entries(
+                            project_dir_entry=project_dir_entry,
+                            older_than=older_than)
+
+    def _scan_project_dir_for_archive_entries(self, project_dir_entry: 
os.DirEntry, older_than: datetime.datetime) -> Iterator[os.DirEntry]:
+        # archive files here but not directories
+        if project_dir_entry.is_file() and project_dir_entry.stat().st_mtime < 
older_than.timestamp():
+            yield project_dir_entry
+        # Skip over shared directories
+        if project_dir_entry.is_dir() and not 
self._is_shared_directory(project_dir_entry):
+            with os.scandir(project_dir_entry.path) as experiment_dirs:
+                for experiment_dir_entry in experiment_dirs:
+                    if experiment_dir_entry.stat().st_mtime < 
older_than.timestamp():
+                        yield experiment_dir_entry
+
+    def _is_shared_directory(self, dir_entry: os.DirEntry) -> bool:
+        if not dir_entry.is_dir():
+            return False
+        shared_dirs = getattr(settings, "GATEWAY_DATA_SHARED_DIRECTORIES", {})
+        for shared_dir in shared_dirs.values():
+            if os.path.samefile(dir_entry.path, shared_dir["path"]):
+                return True
+        return False
diff --git a/django_airavata/apps/admin/migrations/0001_initial.py 
b/django_airavata/apps/admin/migrations/0001_initial.py
new file mode 100644
index 00000000..6223ce73
--- /dev/null
+++ b/django_airavata/apps/admin/migrations/0001_initial.py
@@ -0,0 +1,35 @@
+# Generated by Django 3.2.18 on 2023-04-17 14:10
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = [
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='UserDataArchive',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, 
serialize=False, verbose_name='ID')),
+                ('created_date', models.DateTimeField(auto_now_add=True)),
+                ('updated_date', models.DateTimeField(auto_now=True)),
+                ('archive_name', models.CharField(max_length=255)),
+                ('archive_path', models.TextField()),
+                ('rolled_back', models.BooleanField(default=False)),
+                ('max_modification_time', models.DateTimeField()),
+            ],
+        ),
+        migrations.CreateModel(
+            name='UserDataArchiveEntry',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, 
serialize=False, verbose_name='ID')),
+                ('entry_path', models.TextField()),
+                ('user_data_archive', 
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, 
to='django_airavata_admin.userdataarchive')),
+            ],
+        ),
+    ]
diff --git a/django_airavata/apps/admin/models.py 
b/django_airavata/apps/admin/models.py
index 35e0d648..2471f9af 100644
--- a/django_airavata/apps/admin/models.py
+++ b/django_airavata/apps/admin/models.py
@@ -1,2 +1,17 @@
+from django.db import models
 
 # Create your models here.
+
+
+class UserDataArchive(models.Model):
+    created_date = models.DateTimeField(auto_now_add=True)
+    updated_date = models.DateTimeField(auto_now=True)
+    archive_name = models.CharField(max_length=255)
+    archive_path = models.TextField()
+    rolled_back = models.BooleanField(default=False)
+    max_modification_time = models.DateTimeField()
+
+
+class UserDataArchiveEntry(models.Model):
+    user_data_archive = models.ForeignKey(UserDataArchive, 
on_delete=models.CASCADE)
+    entry_path = models.TextField()
diff --git a/django_airavata/settings.py b/django_airavata/settings.py
index 01c6b144..d041d43f 100644
--- a/django_airavata/settings.py
+++ b/django_airavata/settings.py
@@ -203,6 +203,10 @@ TUS_ENDPOINT = None
 # Override and set to the directory where tus uploads will be stored
 TUS_DATA_DIR = None
 
+# TODO: document, timedelta arguments
+GATEWAY_USER_DATA_ARCHIVE_MAX_AGE = None
+GATEWAY_USER_DATA_ARCHIVE_DIRECTORY = "/tmp"
+
 # Legacy (PGA) Portal link - provide a link to the legacy portal
 PGA_URL = None
 
diff --git a/django_airavata/settings_local.py.sample 
b/django_airavata/settings_local.py.sample
index ba7410ad..1245000c 100644
--- a/django_airavata/settings_local.py.sample
+++ b/django_airavata/settings_local.py.sample
@@ -115,6 +115,10 @@ PROFILE_SERVICE_HOST = AIRAVATA_API_HOST
 PROFILE_SERVICE_PORT = 8962
 PROFILE_SERVICE_SECURE = False
 
+# Gateway user data archive configuration. User data can be periodically
+# archived and deleted to free up storage space.
+# GATEWAY_USER_DATA_ARCHIVE_MAX_AGE = {'days': 90}
+# GATEWAY_USER_DATA_ARCHIVE_DIRECTORY = "/path/dir/where/to/copy/archives"
 
 # Portal settings
 PORTAL_TITLE = 'Django Airavata Gateway'

Reply via email to