seongjinyoon commented on code in PR #3772:
URL: https://github.com/apache/texera/pull/3772#discussion_r2395942816


##########
core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/user/workflow/WorkflowExecutionsResource.scala:
##########
@@ -102,6 +103,179 @@ object WorkflowExecutionsResource {
     }
   }
 
+  /**
+    * Represents a dataset that has access restrictions for export.
+    * Used to track which datasets are non-downloadable and owned by other 
users.
+    *
+    * @param ownerEmail The email of the dataset owner
+    * @param datasetName The name of the dataset
+    */
+  private case class RestrictedDataset(ownerEmail: String, datasetName: 
String) {
+    def cacheKey: (String, String) = (ownerEmail.toLowerCase, 
datasetName.toLowerCase)
+    def label: String = s"$datasetName ($ownerEmail)"
+  }
+
+  /**
+    * Parses a file path to extract dataset information.
+    * Expected format: /ownerEmail/datasetName/...
+    *
+    * @param path The file path from operator properties
+    * @return Some(RestrictedDataset) if path is valid, None otherwise
+    */
+  private def parseDatasetPath(path: String): Option[RestrictedDataset] = {
+    if (path == null) {
+      return None
+    }
+    val trimmed = path.trim
+    if (!trimmed.startsWith("/")) {
+      return None
+    }
+    val segments = trimmed.split("/").filter(_.nonEmpty)
+    if (segments.length < 4) {
+      return None
+    }
+    val ownerEmail = segments(0)
+    val datasetName = segments(1)
+    Some(RestrictedDataset(ownerEmail, datasetName))
+  }
+
+  /**
+    * Checks if a dataset is downloadable by querying the database.
+    * Uses caching to avoid repeated database queries for the same dataset.
+    *
+    * @param dataset The dataset to check
+    * @param cache A cache to store lookup results
+    * @return Some(true) if downloadable, Some(false) if not, None if dataset 
doesn't exist
+    */
+  private def lookupDatasetDownloadable(
+      dataset: RestrictedDataset,
+      cache: mutable.Map[(String, String), Option[Boolean]]
+  ): Option[Boolean] = {
+    cache.getOrElseUpdate(

Review Comment:
   The cache helps if multiple operators use the same dataset, however, your 
proposal is better as it does one single query call.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to