This is an automated email from the ASF dual-hosted git repository.
yasithdev pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airavata-portals.git
The following commit(s) were added to refs/heads/main by this push:
new 470b23420 feat(portal): de-Thrift the data-product READ path to gRPC
(Track D, D4.1) (#186)
470b23420 is described below
commit 470b23420d4bbf0b1a63eca08799a113b0fbb63f
Author: Yasith Jayawardana <[email protected]>
AuthorDate: Tue Jun 9 00:25:28 2026 -0400
feat(portal): de-Thrift the data-product READ path to gRPC (Track D, D4.1)
(#186)
* wip(portal): add gRPC data-product read adapter (Track D, D4.1)
Foundation for the data-product read-path migration:
`grpc_adapters.data_product`
+ `_data_replica_location` map the protobuf `DataProductModel` (from
`research.get_data_product`) to the Thrift attribute shape the
`DataProductSerializer` reads, bridging the DataProductType /
ReplicaLocationCategory / ReplicaPersistentType enums by NAME (proto and
Thrift
assign different ints) and supplying the serializer's extra `modifiedTime`
field.
WIP: not yet wired into the views. The remaining D4.1 work (repoint the 11
`getDataProduct` calls, rework the `DataProductSerializer` method fields off
`airavata_django_portal_sdk.user_storage`, add a byte-streaming download
endpoint) lands with the storage-facade helpers (D4.4). Adapter validated
offline
(field-for-field round-trip incl. the enum name-bridges).
* feat(portal): de-Thrift data-product READ path to gRPC (Track D, D4.1)
Repoint the 11 Thrift getDataProduct calls and the DataProductSerializer
off the legacy airavata_django_portal_sdk.user_storage (which internally
calls Thrift getDataProduct and hangs) onto the gRPC research/storage
facades.
- All getDataProduct -> research.get_data_product(uri) + grpc_adapters
.data_product: FullExperimentViewSet (inputs/outputs incl URI_COLLECTION),
DataProductView.get/put, delete_file, output_views._generate_data,
workspace.create_experiment.
- DataProductSerializer method-fields derived without a backend call:
filesize <- productSize; isInputFileUpload <- first replica filePath
parent dir == "tmp"; downloadURL <- lazy portal URL to the new
download-file endpoint; userHasWriteAccess <- owner / shared-path+admin.
- New byte-streaming download endpoint (download-file): resolves the data
product, streams the first replica via storage.download_file as a
FileResponse. Replaces
user_storage.get_lazy_download_url/get_download_url.
- output_views/delete_file/create_experiment use the storage facade
(file_exists/download_file/delete_file) on the first replica path.
- view_utils.DataProductSharedDirPermission resolves the path via
research.get_data_product instead of
user_storage.get_data_product_metadata.
- grpc_adapters.data_product_file_path: first replica filePath, ~/-prefixed
when relative (the storage facade NPEs on a bare relative path).
Upload/write/list views (D4.2/D4.3) intentionally left on user_storage.
---
.../django_airavata/apps/api/grpc_adapters.py | 70 +++++++++++++++++++++
.../django_airavata/apps/api/output_views.py | 19 +++---
.../django_airavata/apps/api/serializers.py | 71 +++++++++++++---------
.../django_airavata/apps/api/urls.py | 1 +
.../django_airavata/apps/api/view_utils.py | 23 +++----
.../django_airavata/apps/api/views.py | 64 ++++++++++++++-----
.../django_airavata/apps/workspace/views.py | 12 ++--
7 files changed, 191 insertions(+), 69 deletions(-)
diff --git a/airavata-django-portal/django_airavata/apps/api/grpc_adapters.py
b/airavata-django-portal/django_airavata/apps/api/grpc_adapters.py
index b2e24d634..29ebf862a 100644
--- a/airavata-django-portal/django_airavata/apps/api/grpc_adapters.py
+++ b/airavata-django-portal/django_airavata/apps/api/grpc_adapters.py
@@ -25,6 +25,11 @@ from airavata.model.appcatalog.parallelism.ttypes import (
from airavata.model.appcatalog.parser.ttypes import IOType as _ThriftIOType
from airavata.model.application.io.ttypes import DataType as _ThriftDataType
from airavata.model.credential.store.ttypes import SummaryType as
_ThriftSummaryType
+from airavata.model.data.replica.ttypes import (
+ DataProductType as _ThriftDataProductType,
+ ReplicaLocationCategory as _ThriftReplicaLocationCategory,
+ ReplicaPersistentType as _ThriftReplicaPersistentType,
+)
from airavata.model.data.movement.ttypes import (
DataMovementProtocol as _ThriftDataMovementProtocol,
)
@@ -845,6 +850,71 @@ def group(pb):
)
+def _data_replica_location(pb):
+ """gRPC ``DataReplicaLocationModel`` -> serializer shape."""
+ return SimpleNamespace(
+ replicaId=pb.replica_id or None,
+ productUri=pb.product_uri or None,
+ replicaName=pb.replica_name or None,
+ replicaDescription=pb.replica_description or None,
+ creationTime=pb.creation_time or None,
+ lastModifiedTime=pb.last_modified_time or None,
+ validUntilTime=pb.valid_until_time or None,
+ replicaLocationCategory=_thrift_enum_prefixed(
+ pb, 'replica_location_category', _ThriftReplicaLocationCategory,
+ 'REPLICA_LOCATION_CATEGORY_'),
+ replicaPersistentType=_thrift_enum_prefixed(
+ pb, 'replica_persistent_type', _ThriftReplicaPersistentType,
+ 'REPLICA_PERSISTENT_TYPE_'),
+ storageResourceId=pb.storage_resource_id or None,
+ filePath=pb.file_path or None,
+ replicaMetadata=dict(pb.replica_metadata),
+ )
+
+
+def data_product_file_path(data_product):
+ """First replica's ``filePath`` from an adapted data product, or None.
+
+ The gRPC ``storage`` facade expects the FULL FILE PATH, absolute or
+ ``~/``-prefixed (a bare relative path NPEs server-side, as ``resolvePath``
+ expands ``~/`` to the storage root). Replica file paths are typically
+ absolute (e.g. ``/storage/tmp/<file>``); a relative one is ``~/``-prefixed.
+ Pass an adapted ``DataProductModel`` (``grpc_adapters.data_product``).
+ """
+ replicas = getattr(data_product, 'replicaLocations', None) or []
+ if not replicas:
+ return None
+ file_path = getattr(replicas[0], 'filePath', None)
+ if not file_path:
+ return None
+ if not (file_path.startswith('/') or file_path.startswith('~/')):
+ file_path = '~/' + file_path
+ return file_path
+
+
+def data_product(pb):
+ """gRPC ``DataProductModel`` -> ``DataProductSerializer`` shape."""
+ return SimpleNamespace(
+ productUri=pb.product_uri,
+ gatewayId=pb.gateway_id,
+ parentProductUri=pb.parent_product_uri or None,
+ productName=pb.product_name or None,
+ productDescription=pb.product_description or None,
+ ownerName=pb.owner_name or None,
+ dataProductType=_thrift_enum_prefixed(
+ pb, 'data_product_type', _ThriftDataProductType,
+ 'DATA_PRODUCT_TYPE_'),
+ productSize=pb.product_size or None,
+ creationTime=pb.creation_time or None,
+ # the serializer declares both modifiedTime and lastModifiedTime
+ modifiedTime=pb.last_modified_time or None,
+ lastModifiedTime=pb.last_modified_time or None,
+ productMetadata=dict(pb.product_metadata),
+ replicaLocations=[
+ _data_replica_location(r) for r in pb.replica_locations],
+ )
+
+
def user_profile(pb):
"""gRPC ``UserProfile`` -> ``UserProfileSerializer`` shape.
diff --git a/airavata-django-portal/django_airavata/apps/api/output_views.py
b/airavata-django-portal/django_airavata/apps/api/output_views.py
index 69040df5b..1d80b3023 100644
--- a/airavata-django-portal/django_airavata/apps/api/output_views.py
+++ b/airavata-django-portal/django_airavata/apps/api/output_views.py
@@ -1,5 +1,6 @@
import collections
import inspect
+import io
import json
import logging
import os
@@ -8,10 +9,11 @@ from functools import partial
import nbformat
import papermill as pm
from airavata.model.application.io.ttypes import DataType
-from airavata_django_portal_sdk import user_storage
from django.conf import settings
from nbconvert import HTMLExporter
+from . import grpc_adapters
+
logger = logging.getLogger(__name__)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -216,13 +218,16 @@ def _generate_data(request,
DataType.STDERR) and
experiment_output.value.startswith("airavata-dp")):
data_product_uris = experiment_output.value.split(",")
- data_products = map(lambda dpid:
-
request.airavata_client.getDataProduct(request.authz_token,
- dpid),
- data_product_uris)
+ data_products = map(
+ lambda dpid: grpc_adapters.data_product(
+ request.airavata.research.get_data_product(dpid)),
+ data_product_uris)
for data_product in data_products:
- if user_storage.exists(request, data_product):
- output_file = user_storage.open_file(request, data_product)
+ file_path = grpc_adapters.data_product_file_path(data_product)
+ if file_path and request.airavata.storage.file_exists(file_path):
+ resp = request.airavata.storage.download_file(file_path)
+ output_file = io.BytesIO(resp.content)
+ output_file.name = resp.name or os.path.basename(file_path)
output_files.append(output_file)
generate_data_func = output_view_provider.generate_data
diff --git a/airavata-django-portal/django_airavata/apps/api/serializers.py
b/airavata-django-portal/django_airavata/apps/api/serializers.py
index c6a2e33c3..9d22aa40f 100644
--- a/airavata-django-portal/django_airavata/apps/api/serializers.py
+++ b/airavata-django-portal/django_airavata/apps/api/serializers.py
@@ -2,6 +2,7 @@ import copy
import datetime
import json
import logging
+import os
from pathlib import Path
from urllib.parse import quote
from airavata.model.application.io.ttypes import DataType
@@ -79,6 +80,10 @@ from . import models, thrift_utils, view_utils
log = logging.getLogger(__name__)
+# Directory under user storage where uploaded experiment input files are staged
+# (matches the legacy airavata_django_portal_sdk convention).
+TMP_INPUT_FILE_UPLOAD_DIR = "tmp"
+
def user_has_access(request, resource_id, permission="WRITE"):
"""gRPC sharing access check (Track D — replaces the Thrift userHasAccess).
@@ -630,43 +635,53 @@ class DataProductSerializer(
userHasWriteAccess = serializers.SerializerMethodField()
def get_downloadURL(self, data_product):
- """Getter for downloadURL field. Returns None if file is not
available."""
+ """Lazy portal URL to the byte-streaming download endpoint.
+
+ Returns None when the data product has no replica. Resolving the bytes
+ is deferred to the endpoint, so this getter makes no backend call.
+ """
request = self.context['request']
- if user_storage.exists(request, data_product):
- return user_storage.get_lazy_download_url(request, data_product)
- else:
+ if not getattr(data_product, 'replicaLocations', None):
return None
+ base = request.build_absolute_uri(
+ reverse('django_airavata_api:download-file'))
+ return base + '?data-product-uri=' + quote(data_product.productUri)
def get_isInputFileUpload(self, data_product):
- """Return True if this is an uploaded input file."""
- request = self.context['request']
- return user_storage.is_input_file(request, data_product)
+ """Return True if this is an uploaded input file.
+
+ Derived from the data product alone (no backend call): an uploaded
+ input file lives directly under the input-staging directory
+ (TMP_INPUT_FILE_UPLOAD_DIR == "tmp"), so the first replica's file path
+ has that directory as its immediate parent.
+ """
+ replicas = getattr(data_product, 'replicaLocations', None) or []
+ if not replicas or not replicas[0].filePath:
+ return False
+ parent = os.path.dirname(replicas[0].filePath)
+ return os.path.basename(parent) == TMP_INPUT_FILE_UPLOAD_DIR
def get_filesize(self, data_product):
- request = self.context['request']
- # For backwards compatibility with older user_storage, can be
eventually removed
- if hasattr(user_storage, 'get_data_product_metadata') and
user_storage.exists(request, data_product):
- metadata = user_storage.get_data_product_metadata(request,
data_product)
- return metadata['size']
- else:
- return 0
+ # productSize comes from the data product registry; no backend call.
+ return getattr(data_product, 'productSize', None) or 0
def get_userHasWriteAccess(self, data_product: DataProductModel):
+ """Whether the requesting user may write this data product.
+
+ Derived without a backend file-metadata call: the owner always has
+ write access; in a shared directory only gateway admins do; otherwise
+ (a user's own private storage) write is allowed.
+ """
request = self.context['request']
- if user_storage.exists(request, data_product):
- file_metadata = user_storage.get_data_product_metadata(request,
data_product=data_product)
- # In remote API mode, "userHasWriteAccess" is returned so we just
pass it through here
- if "userHasWriteAccess" in file_metadata:
- return file_metadata["userHasWriteAccess"]
- else:
- path = file_metadata["path"]
- shared_path = view_utils.is_shared_path(path)
- if shared_path:
- # Only admins can edit files/directories in a shared
directory
- return request.is_gateway_admin
- return True
- else:
- return False
+ owner = getattr(data_product, 'ownerName', None)
+ if owner and owner == request.user.username:
+ return True
+ replicas = getattr(data_product, 'replicaLocations', None) or []
+ if replicas and replicas[0].filePath:
+ if view_utils.is_shared_path(replicas[0].filePath):
+ # Only admins can edit files in a shared directory.
+ return request.is_gateway_admin
+ return True
# TODO move this into airavata_sdk?
diff --git a/airavata-django-portal/django_airavata/apps/api/urls.py
b/airavata-django-portal/django_airavata/apps/api/urls.py
index 474ebca1c..5b864b578 100644
--- a/airavata-django-portal/django_airavata/apps/api/urls.py
+++ b/airavata-django-portal/django_airavata/apps/api/urls.py
@@ -53,6 +53,7 @@ urlpatterns = [
re_path(r'^upload$', views.upload_input_file, name='upload_input_file'),
re_path(r'^tus-upload-finish$', views.tus_upload_finish,
name='tus_upload_finish'),
+ re_path(r'^download-file$', views.download, name='download-file'),
re_path(r'^download', views.download_file, name='download_file'),
re_path(r'^delete-file$', views.delete_file, name='delete_file'),
re_path(r'^data-products', views.DataProductView.as_view(),
diff --git a/airavata-django-portal/django_airavata/apps/api/view_utils.py
b/airavata-django-portal/django_airavata/apps/api/view_utils.py
index 6b96662c0..e77c5db75 100644
--- a/airavata-django-portal/django_airavata/apps/api/view_utils.py
+++ b/airavata-django-portal/django_airavata/apps/api/view_utils.py
@@ -5,7 +5,6 @@ from datetime import datetime
from pathlib import Path
import pytz
-from airavata_django_portal_sdk import user_storage
from django.conf import settings
from django.http import Http404
from django.http.request import QueryDict
@@ -15,6 +14,8 @@ from rest_framework.reverse import reverse
from rest_framework.utils.urls import remove_query_param, replace_query_param
from rest_framework.viewsets import GenericViewSet
+from . import grpc_adapters
+
logger = logging.getLogger(__name__)
@@ -272,20 +273,12 @@ class BaseSharedDirPermission(permissions.BasePermission):
class DataProductSharedDirPermission(BaseSharedDirPermission):
def get_path(self, request, view) -> str:
- data_product_uri = request.query_params.get('data-product-uri',
request.query_params.get('product-uri', ''))
- file_metadata = user_storage.get_data_product_metadata(request,
data_product_uri=data_product_uri)
- return file_metadata["path"]
-
- def has_permission(self, request, view):
- # Special handling for remote API, just get the userHasWriteAccess
attribute and use that
- if hasattr(settings, 'GATEWAY_DATA_STORE_REMOTE_API'):
- if request.method in permissions.SAFE_METHODS:
- return True
- data_product_uri = request.query_params.get('data-product-uri',
request.query_params.get('product-uri', ''))
- file_metadata = user_storage.get_data_product_metadata(request,
data_product_uri=data_product_uri)
- return file_metadata["userHasWriteAccess"]
- else:
- return super().has_permission(request, view)
+ data_product_uri = request.query_params.get(
+ 'data-product-uri', request.query_params.get('product-uri', ''))
+ data_product = grpc_adapters.data_product(
+ request.airavata.research.get_data_product(data_product_uri))
+ file_path = grpc_adapters.data_product_file_path(data_product)
+ return file_path or ""
class UserStorageSharedDirPermission(BaseSharedDirPermission):
diff --git a/airavata-django-portal/django_airavata/apps/api/views.py
b/airavata-django-portal/django_airavata/apps/api/views.py
index 0d5a1aa23..d17bf30a8 100644
--- a/airavata-django-portal/django_airavata/apps/api/views.py
+++ b/airavata-django-portal/django_airavata/apps/api/views.py
@@ -1,4 +1,5 @@
import base64
+import io
import json
import logging
import os
@@ -38,7 +39,7 @@ from airavata_django_portal_sdk import (
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.exceptions import ObjectDoesNotExist, PermissionDenied
-from django.http import Http404, HttpResponse, JsonResponse
+from django.http import FileResponse, Http404, HttpResponse, JsonResponse
from django.shortcuts import redirect
from django.urls import reverse
from django.views.decorators.gzip import gzip_page
@@ -349,8 +350,8 @@ class FullExperimentViewSet(mixins.RetrieveModelMixin,
experimentModel = self.request.airavata_client.getExperiment(
self.authz_token, lookup_value)
outputDataProducts = [
- self.request.airavata_client.getDataProduct(self.authz_token,
- output.value)
+ grpc_adapters.data_product(
+ self.request.airavata.research.get_data_product(output.value))
for output in experimentModel.experimentOutputs
if (output.value and
output.value.startswith('airavata-dp') and
@@ -358,7 +359,8 @@ class FullExperimentViewSet(mixins.RetrieveModelMixin,
DataType.STDOUT,
DataType.STDERR))]
outputDataProducts += [
- self.request.airavata_client.getDataProduct(self.authz_token, dp)
+ grpc_adapters.data_product(
+ self.request.airavata.research.get_data_product(dp))
for output in experimentModel.experimentOutputs
if (output.value and
output.type == DataType.URI_COLLECTION)
@@ -374,8 +376,8 @@ class FullExperimentViewSet(mixins.RetrieveModelMixin,
exp_output_views = output_views.get_output_views(
self.request, experimentModel, applicationInterface)
inputDataProducts = [
- self.request.airavata_client.getDataProduct(self.authz_token,
- inp.value)
+ grpc_adapters.data_product(
+ self.request.airavata.research.get_data_product(inp.value))
for inp in experimentModel.experimentInputs
if (inp.value and
inp.value.startswith('airavata-dp') and
@@ -383,7 +385,8 @@ class FullExperimentViewSet(mixins.RetrieveModelMixin,
DataType.STDOUT,
DataType.STDERR))]
inputDataProducts += [
- self.request.airavata_client.getDataProduct(self.authz_token, dp)
+ grpc_adapters.data_product(
+ self.request.airavata.research.get_data_product(dp))
for inp in experimentModel.experimentInputs
if (inp.value and
inp.type == DataType.URI_COLLECTION)
@@ -847,16 +850,16 @@ class DataProductView(APIView):
def get(self, request, format=None):
data_product_uri = request.query_params['product-uri']
- data_product = request.airavata_client.getDataProduct(
- request.authz_token, data_product_uri)
+ data_product = grpc_adapters.data_product(
+ request.airavata.research.get_data_product(data_product_uri))
serializer = self.serializer_class(
data_product, context={'request': request})
return Response(serializer.data)
def put(self, request, format=None):
data_product_uri = request.query_params['product-uri']
- data_product = request.airavata_client.getDataProduct(
- request.authz_token, data_product_uri)
+ data_product = grpc_adapters.data_product(
+ request.airavata.research.get_data_product(data_product_uri))
if request.data and "fileContentText" in request.data:
user_storage.update_data_product_content(
request=request,
@@ -912,6 +915,36 @@ def download_file(request):
return redirect(user_storage.get_download_url(request,
data_product_uri=data_product_uri))
+@api_view()
+def download(request):
+ """Stream the bytes of a data product's first replica.
+
+ Resolves ``?data-product-uri=`` via the gRPC research registry and streams
+ the file from the gRPC storage facade. Replaces the legacy SDK
+ download-URL/redirect path. The DataProductSerializer's ``downloadURL``
+ field points here.
+ """
+ data_product_uri = request.GET.get('data-product-uri', '')
+ try:
+ data_product = grpc_adapters.data_product(
+ request.airavata.research.get_data_product(data_product_uri))
+ except Exception as e:
+ log.warning("Failed to load DataProduct for {}".format(
+ data_product_uri), exc_info=True, extra={'request': request})
+ raise Http404("data product does not exist") from e
+ file_path = grpc_adapters.data_product_file_path(data_product)
+ if file_path is None:
+ raise Http404("data product has no replica to download")
+ resp = request.airavata.storage.download_file(file_path)
+ file_name = resp.name or data_product.productName or
os.path.basename(file_path)
+ response = FileResponse(
+ io.BytesIO(resp.content),
+ as_attachment=False,
+ filename=file_name,
+ content_type=resp.content_type or 'application/octet-stream')
+ return response
+
+
@api_view(http_method_names=['DELETE'])
@permission_classes([IsAuthenticated, DataProductSharedDirPermission])
def delete_file(request):
@@ -919,8 +952,8 @@ def delete_file(request):
data_product_uri = request.GET.get('data-product-uri', '')
data_product = None
try:
- data_product = request.airavata_client.getDataProduct(
- request.authz_token, data_product_uri)
+ data_product = grpc_adapters.data_product(
+ request.airavata.research.get_data_product(data_product_uri))
except Exception as e:
log.warning("Failed to load DataProduct for {}"
.format(data_product_uri), exc_info=True)
@@ -929,7 +962,10 @@ def delete_file(request):
if (data_product.gatewayId != settings.GATEWAY_ID or
data_product.ownerName != request.user.username):
raise PermissionDenied()
- user_storage.delete(request, data_product)
+ file_path = grpc_adapters.data_product_file_path(data_product)
+ if file_path is None:
+ raise Http404("data product has no replica to delete")
+ request.airavata.storage.delete_file(file_path)
return HttpResponse(status=204)
except ObjectDoesNotExist as e:
raise Http404(str(e)) from e
diff --git a/airavata-django-portal/django_airavata/apps/workspace/views.py
b/airavata-django-portal/django_airavata/apps/workspace/views.py
index 0c85df717..54d7d58fd 100644
--- a/airavata-django-portal/django_airavata/apps/workspace/views.py
+++ b/airavata-django-portal/django_airavata/apps/workspace/views.py
@@ -4,13 +4,12 @@ import logging
from urllib.parse import urlparse
from airavata.model.application.io.ttypes import DataType
-from airavata_django_portal_sdk import user_storage as user_storage_sdk
from django.contrib.auth.decorators import login_required
from django.shortcuts import render
from django.utils.module_loading import import_string
from rest_framework.renderers import JSONRenderer
-from django_airavata.apps.api import models
+from django_airavata.apps.api import grpc_adapters, models
from django_airavata.apps.api.views import (
ApplicationModuleViewSet,
ExperimentSearchViewSet,
@@ -94,9 +93,12 @@ def create_experiment(request, app_module_id):
if user_file_url.scheme == 'airavata-dp':
dp_uri = user_file_value
try:
- data_product = request.airavata_client.getDataProduct(
- request.authz_token, dp_uri)
- if user_storage_sdk.exists(request, data_product):
+ data_product = grpc_adapters.data_product(
+ request.airavata.research.get_data_product(dp_uri))
+ file_path = grpc_adapters.data_product_file_path(
+ data_product)
+ if file_path and request.airavata.storage.file_exists(
+ file_path):
user_input_values[app_input['name']] = dp_uri
except Exception:
logger.exception(