This is an automated email from the ASF dual-hosted git repository. Xiao-zhen-Liu pushed a commit to branch xiaozhen-caching-prototype in repository https://gitbox.apache.org/repos/asf/texera.git
commit 1449763c8a0b81f74788031f5f65559cbe17e607 Merge: 37099c2a5e 830e3090b1 Author: Xiaozhen Liu <[email protected]> AuthorDate: Fri Feb 6 16:51:19 2026 -0800 Merge branch 'refs/heads/main' into xiaozhen-caching-prototype # Conflicts: # common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/WorkflowSettings.scala # frontend/src/app/workspace/service/joint-ui/joint-ui.service.ts .asf.yaml | 2 +- .github/workflows/build-and-push-images.yml | 13 + .licenserc.yaml | 14 + LICENSE | 38 + .../core/architecture/managers/executor_manager.py | 26 +- .../architecture/managers/test_executor_manager.py | 92 +- .../scheduling/CostBasedScheduleGenerator.scala | 36 +- .../apache/texera/web/resource/GmailResource.scala | 9 +- .../texera/web/resource/UserConfigResource.scala | 13 +- .../texera/web/resource/auth/AuthResource.scala | 13 +- .../web/resource/auth/GoogleAuthResource.scala | 13 +- .../resource/dashboard/SearchQueryBuilder.scala | 7 +- .../resource/dashboard/UnifiedResourceSchema.scala | 7 +- .../admin/execution/AdminExecutionResource.scala | 7 +- .../admin/settings/AdminSettingsResource.scala | 2 +- .../dashboard/admin/user/AdminUserResource.scala | 9 +- .../web/resource/dashboard/hub/HubResource.scala | 14 +- .../web/resource/dashboard/user/UserResource.scala | 4 +- .../dataset/utils/DatasetStatisticsUtils.scala | 8 +- .../user/project/ProjectAccessResource.scala | 20 +- .../dashboard/user/project/ProjectResource.scala | 13 +- .../user/project/PublicProjectResource.scala | 11 +- .../dashboard/user/quota/UserQuotaResource.scala | 7 +- .../user/workflow/WorkflowAccessResource.scala | 16 +- .../user/workflow/WorkflowExecutionsResource.scala | 9 +- .../dashboard/user/workflow/WorkflowResource.scala | 36 +- .../user/workflow/WorkflowVersionResource.scala | 11 +- .../service/ExecutionsMetadataPersistService.scala | 14 +- .../apache/texera/workflow/WorkflowCompiler.scala | 5 +- .../CostBasedScheduleGeneratorSpec.scala | 311 +++++- .../amber/engine/e2e/DataProcessingSpec.scala | 145 ++- bin/computing-unit-master.dockerfile | 53 +- bin/computing-unit-worker.dockerfile | 58 +- bin/k8s/templates/external-names.yaml | 8 + ...workflow-computing-unit-manager-deployment.yaml | 13 + .../org/apache/texera/auth/JwtAuthFilter.scala | 2 +- .../texera/auth/util/ComputingUnitAccess.scala | 7 +- common/config/src/main/resources/gui.conf | 5 +- .../scala/org/apache/texera/config/GuiConfig.scala | 2 + .../scala/org/apache/texera/dao/SqlServer.scala | 14 +- .../texera/amber/core/workflow/ExecutionMode.java | 14 +- .../amber/core/workflow/WorkflowContext.scala | 4 +- .../amber/core/workflow/WorkflowSettings.scala | 6 +- .../texera/service/util/S3StorageClient.scala | 1 + .../jackson/jsonSchema/JsonSchemaDraft.java | 19 +- .../jackson/jsonSchema/JsonSchemaGenerator.scala | 19 +- .../JsonSchemaArrayWithUniqueItems.java | 20 +- .../jsonSchema/annotations/JsonSchemaBool.java | 20 +- .../jsonSchema/annotations/JsonSchemaDefault.java | 20 +- .../annotations/JsonSchemaDescription.java | 20 +- .../jsonSchema/annotations/JsonSchemaExamples.java | 20 +- .../jsonSchema/annotations/JsonSchemaFormat.java | 20 +- .../jsonSchema/annotations/JsonSchemaInject.java | 20 +- .../jsonSchema/annotations/JsonSchemaInt.java | 20 +- .../jsonSchema/annotations/JsonSchemaOptions.java | 20 +- .../jsonSchema/annotations/JsonSchemaString.java | 20 +- .../jsonSchema/annotations/JsonSchemaTitle.java | 20 +- .../sklearnAdvanced/base/HyperParameters.java | 58 -- .../sklearnAdvanced/base/HyperParameters.scala | 68 ++ ...ortCriteriaUnit.java => SortCriteriaUnit.scala} | 23 +- .../texera/amber/operator/udf/r/RUDFOpDesc.scala | 4 +- .../resource/ComputingUnitAccessResource.scala | 7 +- .../resource/ComputingUnitManagingResource.scala | 12 +- .../texera/service/resource/ConfigResource.scala | 1 + .../service/resource/DatasetAccessResource.scala | 7 +- .../texera/service/resource/DatasetResource.scala | 535 +++++++--- .../service/util/LakeFSExceptionHandler.scala | 79 ++ .../service/resource/DatasetResourceSpec.scala | 1035 +++++++++++++++++++- frontend/src/app/app.module.ts | 4 + frontend/src/app/common/formly/array.type.ts | 23 +- frontend/src/app/common/formly/multischema.type.ts | 20 +- frontend/src/app/common/formly/null.type.ts | 20 +- frontend/src/app/common/formly/object.type.ts | 20 +- .../app/common/service/gui-config.service.mock.ts | 2 + frontend/src/app/common/type/gui-config.ts | 2 + frontend/src/app/common/type/workflow.ts | 6 + .../component/admin/user/admin-user.component.html | 6 +- .../component/admin/user/admin-user.component.ts | 5 + .../component/user-dashboard-test-fixtures.ts | 4 +- .../conflicting-file-modal-content.component.html} | 35 +- .../conflicting-file-modal-content.component.scss | 11 +- .../conflicting-file-modal-content.component.ts} | 20 +- .../files-uploader/files-uploader.component.ts | 214 +++- .../user/share-access/share-access.component.html | 19 +- .../user/share-access/share-access.component.ts | 97 +- .../dataset-detail.component.ts | 31 +- .../user/user-workflow/user-workflow.component.ts | 7 +- .../service/user/dataset/dataset.service.ts | 92 +- .../app/dashboard/type/dashboard-file.interface.ts | 1 + .../landing-page/landing-page.component.html | 5 - .../left-panel/settings/settings.component.html | 17 +- .../left-panel/settings/settings.component.scss | 4 - .../left-panel/settings/settings.component.ts | 64 +- .../app/workspace/component/menu/menu.component.ts | 12 +- .../workspace/service/joint-ui/joint-ui.service.ts | 6 +- .../model/workflow-action.service.ts | 7 +- frontend/src/assets/svg/hub_icon.svg | 576 ----------- .../assets/svg/operator-reuse-cache-invalid.svg | 26 +- .../src/assets/svg/operator-reuse-cache-valid.svg | 28 +- frontend/src/assets/svg/operator-view-result.svg | 20 +- licenses/LICENSE-MIT.txt | 19 + project/plugins.sbt | 3 + .../src/language-server-runner.ts | 23 +- pyright-language-service/src/main.ts | 23 +- pyright-language-service/src/server-commons.ts | 24 +- .../src/types/hocon-parser.d.ts | 2 +- sql/texera_ddl.sql | 29 +- sql/updates/19.sql | 60 ++ sql/updates/20.sql | 37 + 109 files changed, 3266 insertions(+), 1557 deletions(-) diff --cc amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/workflow/WorkflowExecutionsResource.scala index b8bf13ec67,72fb1c364e..00a50231cf --- a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/workflow/WorkflowExecutionsResource.scala +++ b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/workflow/WorkflowExecutionsResource.scala @@@ -63,13 -55,13 +63,14 @@@ import javax.ws.rs. import javax.ws.rs.core.{MediaType, Response} import scala.collection.mutable import scala.jdk.CollectionConverters._ +import scala.jdk.OptionConverters._ object WorkflowExecutionsResource { - final private lazy val context = SqlServer - .getInstance() - .createDSLContext() - final private lazy val executionsDao = new WorkflowExecutionsDao(context.configuration) + private def context: DSLContext = + SqlServer + .getInstance() + .createDSLContext() + private def executionsDao = new WorkflowExecutionsDao(context.configuration) private def getExecutionById(eId: Integer): WorkflowExecutions = { executionsDao.fetchOneByEid(eId) diff --cc common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/WorkflowSettings.scala index d9799f0104,c4a86d3538..81eb20df90 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/WorkflowSettings.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/WorkflowSettings.scala @@@ -19,20 -19,11 +19,24 @@@ package org.apache.texera.amber.core.workflow + import org.apache.texera.config.GuiConfig + +import org.apache.texera.amber.core.virtualidentity.ExecutionIdentity + +import java.net.URI + +case class CachedOutput( + resultUri: URI, + fingerprintJson: String, + tupleCount: Option[Long], + sourceExecutionId: Option[ExecutionIdentity] +) + case class WorkflowSettings( - dataTransferBatchSize: Int, + dataTransferBatchSize: Int = 400, + executionMode: ExecutionMode = + ExecutionMode.valueOf(GuiConfig.guiWorkflowWorkspaceDefaultExecutionMode), - outputPortsNeedingStorage: Set[GlobalPortIdentity] = Set.empty + outputPortsNeedingStorage: Set[GlobalPortIdentity] = Set.empty, + // serialized GlobalPortIdentity -> cached output + cachedOutputs: Map[String, CachedOutput] = Map.empty ) diff --cc frontend/src/app/workspace/service/joint-ui/joint-ui.service.ts index d93110ebf2,77458947cd..eda67e5973 --- a/frontend/src/app/workspace/service/joint-ui/joint-ui.service.ts +++ b/frontend/src/app/workspace/service/joint-ui/joint-ui.service.ts @@@ -366,9 -339,7 +366,9 @@@ export class JointUIService originalName = portId; } - const labelText = count.toLocaleString(); + // Negative counts mark skipped/unknown inputs from cached sub-operators. + const isUnknownCount = count !== undefined && count < 0; - const labelText = isSkippedFromCache || isUnknownCount ? "-" : String(count ?? 0); ++ const labelText = isSkippedFromCache || isUnknownCount ? "-" : (count ?? 0).toLocaleString(); element.portProp(portId, "attrs/.port-label/text", labelText); } }); @@@ -388,67 -359,13 +388,67 @@@ originalName = portId; } - const baseLabel = isSkippedFromCache && count === undefined ? "-" : String(count ?? 0); - const labelText = count.toLocaleString(); - - element.portProp(portId, "attrs/.port-label/text", labelText); ++ const baseLabel = isSkippedFromCache && count === undefined ? "-" : (count ?? 0).toLocaleString(); + element.portProp(portId, "attrs/.port-label/text", baseLabel); } }); + const effectiveCacheLabels = isSkippedFromCache ? cachePortLabels : undefined; + this.changeOperatorCacheLabels(jointPaper, operatorID, effectiveCacheLabels); this.changeOperatorState(jointPaper, operatorID, statistics.operatorState); } + + /** + * Updates cache usage labels for output ports without changing counts or operator state. + */ + public changeOperatorCacheLabels( + jointPaper: joint.dia.Paper, + operatorID: string, + cachePortLabels?: Record<string, string> + ): void { + const element = jointPaper.getModelById(operatorID) as joint.shapes.devs.Model; + if (!element) { + return; + } + const outPorts = element.getPorts().filter(p => p.group === "out"); + outPorts.forEach(portDef => { + const portId = portDef.id; + if (portId != null) { + const parts = portId.split("-"); + const numericSuffix = parts.length > 1 ? parts[1] : portId; + const cacheLabel = cachePortLabels?.[numericSuffix] ?? ""; + element.portProp(portId, "attrs/.port-cache-label/text", cacheLabel); + element.portProp( + portId, + "attrs/.port-cache-label/transform", + cacheLabel ? "translate(0, 12)" : "" + ); + } + }); + } + + /** + * Updates cached output port indicator badges without changing counts or labels. + */ + public changeOperatorCachedPorts( + jointPaper: joint.dia.Paper, + operatorID: string, + cachedPortIds?: Set<string> + ): void { + const element = jointPaper.getModelById(operatorID) as joint.shapes.devs.Model; + if (!element) { + return; + } + const outPorts = element.getPorts().filter(p => p.group === "out"); + outPorts.forEach(portDef => { + const portId = portDef.id; + if (portId != null) { + const parts = portId.split("-"); + const numericSuffix = parts.length > 1 ? parts[1] : portId; + const isCached = cachedPortIds?.has(numericSuffix) ?? false; + element.portProp(portId, "attrs/.port-cache-indicator/display", isCached ? "block" : "none"); + } + }); + } public foldOperatorDetails(jointPaper: joint.dia.Paper, operatorID: string): void { jointPaper.getModelById(operatorID).attr({ [`.${operatorStateClass}`]: { visibility: "hidden" },
