This is an automated email from the ASF dual-hosted git repository. Xiao-zhen-Liu pushed a commit to branch xiaozhen-caching-prototype in repository https://gitbox.apache.org/repos/asf/texera.git
commit 819711855cc857355a9f975e0fbb6b25e3d4fa1a Merge: 79623cd27a eb287f3fc2 Author: Xiaozhen Liu <[email protected]> AuthorDate: Mon Jun 1 15:14:35 2026 -0700 Merge branch 'refs/heads/main' into xiaozhen-caching-prototype # Conflicts: # amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/promisehandlers/WorkerExecutionCompletedHandler.scala # amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala # amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/RegionExecutionCoordinator.scala # amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/WorkflowExecutionCoordinator.scala # amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/config/PortConfig.scala # frontend/src/app/workspace/component/menu/menu.component.ts # frontend/src/app/workspace/component/workflow-editor/context-menu/context-menu/context-menu.component.html # frontend/src/app/workspace/component/workflow-editor/context-menu/context-menu/context-menu.component.ts # frontend/src/app/workspace/component/workflow-editor/workflow-editor.component.ts # frontend/src/app/workspace/service/joint-ui/joint-ui.service.ts .asf.yaml | 113 +- .github/ISSUE_TEMPLATE/bug-template.yaml | 11 +- .github/ISSUE_TEMPLATE/feature-template.yaml | 15 - .github/ISSUE_TEMPLATE/task-template.yaml | 20 +- .github/labeler.yml | 69 +- .github/release/vote-email-template.md | 2 - .github/scripts/compose-backport-message.py | 56 + .github/scripts/prepare-backport-checkout.sh | 50 + .../automatic-email-notif-on-ddl-change.yml | 2 +- .github/workflows/build-and-push-images.yml | 45 +- .github/workflows/build.yml | 850 + .github/workflows/check-header.yml | 3 +- .github/workflows/comment-commands.yml | 363 + .github/workflows/create-release-candidate.yml | 118 +- .github/workflows/direct-backport-push.yml | 619 + .github/workflows/github-action-build.yml | 179 - .github/workflows/issue-triage.yml | 89 - .github/workflows/license-binary-checker.yml | 176 + .github/workflows/lint-pr.yml | 7 +- .github/workflows/pr-assignment.yml | 258 + .github/workflows/required-checks.yml | 294 + .../workflows/welcome-first-time-contributor.yml | 157 + .gitignore | 16 + .github/workflows/check-header.yml => .jvmopts | 27 +- .licenserc.yaml | 11 +- .run/ComputingUnitMaster.run.xml | 1 + .run/ComputingUnitWorker.run.xml | 1 + AGENTS.md | 197 + CLAUDE.md | 3 + CONTRIBUTING.md | 41 + DISCLAIMER | 8 + DISCLAIMER-WIP | 23 - Dockerfile | 60 - LICENSE | 9 +- NOTICE | 2 +- README.md | 150 +- LICENSE => access-control-service/LICENSE-binary | 251 +- access-control-service/NOTICE-binary | 1546 ++ access-control-service/build.sbt | 10 + .../texera/service/AccessControlService.scala | 7 + .../activity/UserActivityEventListener.scala | 79 + .../service/resource/AccessControlResource.scala | 28 +- .../apache/texera/AccessControlResourceSpec.scala | 58 + .../service/AccessControlServiceRunSpec.scala | 49 + .../activity/UserActivityEventListenerSpec.scala | 136 + agent-service/.dockerignore | 2 + agent-service/.env.example | 21 + agent-service/.prettierrc | 13 + LICENSE => agent-service/LICENSE-binary | 141 +- agent-service/bin/collect-licenses.ts | 87 + agent-service/bun.lock | 237 + agent-service/package.json | 37 + .../src/agent/index.ts | 2 + agent-service/src/agent/prompts.ts | 296 + agent-service/src/agent/texera-agent.ts | 840 + .../src/agent/tools/index.ts | 3 + .../src/agent/tools/result-formatting.test.ts | 343 + agent-service/src/agent/tools/result-formatting.ts | 135 + .../src/agent/tools/tools-utility.test.ts | 177 + agent-service/src/agent/tools/tools-utility.ts | 76 + .../src/agent/tools/workflow-crud-tools.ts | 346 + .../src/agent/tools/workflow-execution-tools.ts | 605 + agent-service/src/agent/util/auto-layout.test.ts | 163 + agent-service/src/agent/util/auto-layout.ts | 70 + agent-service/src/agent/util/context-utils.ts | 288 + .../src/agent/util/workflow-system-metadata.ts | 268 + agent-service/src/agent/util/workflow-utils.ts | 209 + .../src/agent/workflow-result-state.test.ts | 95 + agent-service/src/agent/workflow-result-state.ts | 83 + agent-service/src/agent/workflow-state.test.ts | 176 + agent-service/src/agent/workflow-state.ts | 488 + agent-service/src/api/auth-api.ts | 65 + agent-service/src/api/backend-api.ts | 88 + agent-service/src/api/compile-api.ts | 74 + .../src/api/execution-api.ts | 24 +- .../src/api/index.ts | 11 +- agent-service/src/api/workflow-api.ts | 97 + agent-service/src/config/env.ts | 39 + .../src/index.ts | 14 +- .../src/test.ts => agent-service/src/logger.ts | 30 +- agent-service/src/server.test.ts | 223 + agent-service/src/server.ts | 664 + agent-service/src/types/agent.ts | 165 + .../src/types/execution.ts | 35 +- .../src/types/index.ts | 3 + agent-service/src/types/workflow.ts | 144 + agent-service/tsconfig.json | 23 + amber/LICENSE-binary-java | 714 + LICENSE => amber/LICENSE-binary-python | 231 +- amber/NOTICE-binary | 1694 ++ amber/build.sbt | 38 +- ...rator-requirements.txt => dev-requirements.txt} | 23 +- amber/operator-requirements.txt | 5 +- amber/{src/main/python => }/pyproject.toml | 20 +- amber/requirements.txt | 6 +- .../engine/architecture/rpc/controlcommands.proto | 29 +- .../architecture/rpc/controllerservice.proto | 4 +- .../engine/architecture/rpc/workerservice.proto | 1 + .../handlers/control/assign_port_handler.py | 6 +- .../handlers/control/debug_command_handler.py | 45 +- .../handlers/control/update_executor_handler.py | 27 +- .../python/core/architecture/managers/context.py | 25 +- .../core/architecture/managers/executor_manager.py | 50 +- .../core/architecture/managers/pause_manager.py | 7 +- .../architecture/managers/statistics_manager.py | 29 +- .../core/architecture/packaging/output_manager.py | 63 +- .../rpc/async_rpc_handler_initializer.py | 4 + .../core/architecture/rpc/async_rpc_server.py | 21 +- amber/src/main/python/core/models/operator.py | 4 +- amber/src/main/python/core/models/state.py | 90 +- amber/src/main/python/core/proxy/proxy_server.py | 10 +- amber/src/main/python/core/python_worker.py | 4 +- .../main/python/core/runnables/data_processor.py | 127 +- amber/src/main/python/core/runnables/heartbeat.py | 20 +- amber/src/main/python/core/runnables/main_loop.py | 23 +- .../main/python/core/runnables/network_receiver.py | 11 +- .../main/python/core/runnables/network_sender.py | 13 +- .../main/python/core/storage/document_factory.py | 133 +- .../storage/iceberg/iceberg_catalog_instance.py | 4 - .../core/storage/iceberg/iceberg_document.py | 7 +- .../python/core/storage/iceberg/iceberg_utils.py | 17 +- .../input_port_materialization_reader_runnable.py | 23 +- .../src/main/python/core/storage/storage_config.py | 3 + .../main/python/core/storage/vfs_uri_factory.py | 17 +- .../main/python/core/util/{thread => }/atomic.py | 2 +- .../core/util/{protocol => }/base_protocols.py | 0 .../main/python/core/util/buffer/buffer_base.py | 2 +- .../linked_blocking_multi_queue.py | 2 +- .../core/util/customized_queue/queue_base.py | 2 +- .../__init__.py => expression_evaluator.py} | 0 .../src/main/python/core/util/operator/__init__.py | 16 - .../python/core/util/{runnable => }/runnable.py | 0 .../stoppable/stoppable_queue_blocking_thread.py | 4 +- amber/src/main/python/core/util/thread/__init__.py | 16 - .../__init__.py => virtual_identity.py} | 17 + amber/src/main/python/proto/__init__.py | 0 amber/src/main/python/proto/org/__init__.py | 0 amber/src/main/python/proto/org/apache/__init__.py | 0 .../python/proto/org/apache/texera/__init__.py | 0 .../proto/org/apache/texera/amber/__init__.py | 0 .../proto/org/apache/texera/amber/core/__init__.py | 146 - .../org/apache/texera/amber/engine/__init__.py | 0 .../texera/amber/engine/architecture/__init__.py | 0 .../amber/engine/architecture/rpc/__init__.py | 2100 --- .../engine/architecture/sendsemantics/__init__.py | 66 - .../amber/engine/architecture/worker/__init__.py | 49 - .../apache/texera/amber/engine/common/__init__.py | 156 - .../python/proto/org/apache/texera/web/__init__.py | 158 - amber/src/main/python/proto/scalapb/__init__.py | 421 - .../pytexera/storage/dataset_file_document.py | 16 +- .../pytexera/storage/large_binary_input_stream.py | 24 +- .../pytexera/storage/large_binary_output_stream.py | 51 +- amber/src/main/python/texera_run_python_worker.py | 2 + .../architecture/common/AmberProcessor.scala | 5 +- .../architecture/common/ExecutorDeployment.scala | 10 +- ...ice.scala => PekkoActorRefMappingService.scala} | 2 +- ...aActorService.scala => PekkoActorService.scala} | 2 +- ...ice.scala => PekkoMessageTransferService.scala} | 6 +- .../engine/architecture/common/WorkflowActor.scala | 10 +- .../architecture/controller/Controller.scala | 1 + .../ControllerAsyncRPCHandlerInitializer.scala | 4 +- .../controller/ControllerProcessor.scala | 21 +- .../controller/ControllerTimerService.scala | 6 +- .../controller/WorkflowScheduler.scala | 2 + .../controller/execution/OperatorExecution.scala | 2 +- .../controller/execution/RegionExecution.scala | 4 +- .../controller/execution/WorkflowExecution.scala | 18 +- .../JumpToOperatorRegionHandler.scala} | 41 +- .../promisehandlers/ReconfigurationHandler.scala | 141 + .../deploysemantics/deploystrategy/OneOnEach.scala | 3 +- .../deploystrategy/RandomDeployment.scala | 3 + .../deploystrategy/RoundRobinDeployment.scala | 3 + .../messaginglayer/AmberFIFOChannel.scala | 6 +- .../messaginglayer/NetworkInputGateway.scala | 6 + .../messaginglayer/NetworkOutputGateway.scala | 6 + .../messaginglayer/OutputManager.scala | 69 +- .../messaginglayer/WorkerTimerService.scala | 4 +- .../pythonworker/PythonProxyServer.scala | 2 +- .../pythonworker/PythonWorkflowWorker.scala | 24 +- .../scheduling/CostBasedScheduleGenerator.scala | 6 +- .../ExpansionGreedyScheduleGenerator.scala | 4 +- .../scheduling/RegionExecutionCoordinator.scala | 97 +- .../engine/architecture/scheduling/Schedule.scala | 12 +- .../scheduling/ScheduleGenerator.scala | 36 +- .../architecture/scheduling/SchedulingUtils.scala | 2 +- .../scheduling/WorkflowExecutionCoordinator.scala | 44 +- .../scheduling/config/ChannelConfig.scala | 5 + .../scheduling/config/LinkConfig.scala | 7 +- .../scheduling/config/PortConfig.scala | 6 +- .../scheduling/config/WorkerConfig.scala | 13 +- .../resourcePolicies/ResourceAllocator.scala | 5 +- .../engine/architecture/worker/DataProcessor.scala | 11 +- .../DataProcessorRPCHandlerInitializer.scala | 28 +- .../engine/architecture/worker/PauseType.scala | 2 - .../InputPortMaterializationReaderThread.scala | 34 +- ...d.scala => OutputPortStorageWriterThread.scala} | 40 +- .../worker/managers/SerializationManager.scala | 8 +- .../worker/managers/StatisticsManager.scala | 17 +- .../worker/promisehandlers/AssignPortHandler.scala | 4 +- .../worker/promisehandlers/EndHandler.scala | 2 +- .../InitializeExecutorHandler.scala | 26 +- ...ndHandler.scala => UpdateExecutorHandler.scala} | 37 +- .../texera/amber/engine/common/AmberRuntime.scala | 10 +- .../common}/FriesReconfigurationAlgorithm.scala | 55 +- .../amberexception/BreakpointException.scala | 24 - .../amber/engine/common/rpc/AsyncRPCClient.scala | 8 +- .../apache/texera/web/ComputingUnitMaster.scala | 18 +- .../apache/texera/web/TexeraWebApplication.scala | 1 + .../apache/texera/web/auth/UserAuthenticator.scala | 39 +- .../apache/texera/web/resource/GmailResource.scala | 9 +- .../web/resource/HuggingFaceModelResource.scala | 750 + .../web/resource/SyncExecutionResource.scala | 915 + .../web/resource/dashboard/hub/HubResource.scala | 33 +- .../user/project/ProjectAccessResource.scala | 15 +- .../pythonvirtualenvironment/PveManager.scala | 452 + .../pythonvirtualenvironment/PveResource.scala | 122 + .../PveWebsocketResource.scala | 96 + .../service/ExecutionReconfigurationService.scala | 136 +- .../web/service/ExecutionResultService.scala | 54 +- .../service/ExecutionsMetadataPersistService.scala | 2 +- .../texera/web/service/WorkflowService.scala | 2 +- .../org/apache/texera/workflow/LogicalLink.scala | 13 + .../e2e/ReconfigurationIntegrationSpec.scala | 267 + .../IcebergRestCatalogIntegrationSpec.scala | 82 + .../apache/texera/amber/tags/IntegrationTest.java | 52 + .../handlers/control/test_debug_command_handler.py | 195 + .../control/test_evaluate_expression_handler.py | 156 + .../control/test_replay_current_tuple_handler.py | 139 + .../control/test_update_executor_handler.py | 89 + .../managers/test_console_message_manager.py | 83 + .../architecture/managers/test_debug_manager.py | 116 + .../test_embedded_control_message_manager.py | 194 + .../managers/test_exception_manager.py | 76 + .../architecture/managers/test_executor_manager.py | 189 +- .../architecture/managers/test_pause_manager.py | 0 .../architecture/managers/test_state_manager.py | 20 +- .../managers/test_state_processing_manager.py | 59 + .../managers/test_statistics_manager.py | 147 + .../managers/test_tuple_processing_manager.py | 100 + .../architecture/packaging/test_output_manager.py | 107 + .../packaging/test_state_materialization_e2e.py | 258 + .../core/architecture/rpc/test_async_rpc_client.py | 259 + .../sendsemantics/test_partitioners.py | 434 + .../python/core/models/schema/test_schema.py | 0 amber/src/test/python/core/models/test_operator.py | 268 + amber/src/test/python/core/models/test_state.py | 101 + .../python/core/models/test_table.py | 0 .../python/core/models/test_tuple.py | 0 .../python/core/models/type/test_large_binary.py | 0 .../python/core/proxy/test_proxy_client.py | 4 +- .../python/core/proxy/test_proxy_server.py | 21 +- .../python/core/runnables/test_console_message.py | 0 .../python/core/runnables/test_data_processor.py | 207 + .../test/python/core/runnables/test_heartbeat.py | 113 + .../python/core/runnables/test_main_loop.py | 603 +- .../python/core/runnables/test_network_receiver.py | 39 +- .../python/core/runnables/test_network_sender.py | 0 .../core/storage/iceberg/test_iceberg_document.py | 117 +- .../iceberg/test_iceberg_iterator_error_paths.py} | 31 +- .../test_iceberg_rest_catalog_integration.py | 66 + .../storage/iceberg/test_iceberg_utils_catalog.py | 0 .../iceberg/test_iceberg_utils_large_binary.py | 0 ...t_input_port_materialization_reader_runnable.py | 99 + .../python/core/storage/test_document_factory.py | 172 + amber/src/test/python/core/test_python_worker.py | 125 + .../util/console_message/test_replace_print.py | 135 + .../core/util/customized_queue/test_inner.py | 182 + .../test_linked_blocking_multi_queue.py | 0 amber/src/test/python/core/util/test_atomic.py | 119 + .../python/core/util}/test_expression_evaluator.py | 2 +- .../test/python/core/util/test_virtual_identity.py | 172 + .../pytexera/storage/test_dataset_file_document.py | 216 + .../storage/test_large_binary_input_stream.py | 12 +- .../pytexera/storage/test_large_binary_manager.py | 1 + .../storage/test_large_binary_output_stream.py | 143 +- .../udf/examples/test_count_batch_operator.py | 2 +- .../pytexera/udf/examples/test_echo_operator.py | 2 +- .../udf/examples/test_echo_table_operator.py | 2 +- .../udf/examples/test_generator_operator_binary.py | 2 +- .../examples/test_generator_operator_integer.py | 2 +- .../breakpoint/ExceptionBreakpointSpec.scala | 298 - .../common/ProcessingStepCursorSpec.scala | 66 + .../architecture/control/TrivialControlSpec.scala | 45 +- .../control/utils/TrivialControlTester.scala | 14 +- .../controller/GlobalReplayManagerSpec.scala | 104 + .../controller/WorkflowSchedulerSpec.scala | 103 + .../controller/execution/ExecutionUtilsSpec.scala | 340 + .../controller/execution/LinkExecutionSpec.scala | 66 + .../execution/WorkerPortExecutionSpec.scala | 41 +- .../execution/WorkflowExecutionSpec.scala | 167 + .../deploysemantics/AddressInfoSpec.scala | 84 + .../deploystrategy/DeployStrategiesSpec.scala | 133 + .../layer/WorkerExecutionSpec.scala | 106 + .../logreplay/EmptyReplayLogManagerImplSpec.scala | 101 + .../logreplay/LogreplayPrimitivesSpec.scala | 409 + .../messaginglayer/AmberFIFOChannelSpec.scala | 203 + .../messaginglayer/CongestionControlSpec.scala | 249 + .../messaginglayer/FlowControlSpec.scala | 212 + .../messaginglayer/NetworkInputGatewaySpec.scala | 12 + .../messaginglayer/OrderingEnforcerSpec.scala | 150 + .../messaginglayer/WorkerPortSpec.scala | 67 + .../scheduling/RegionCoordinatorTestSupport.scala | 241 + .../RegionExecutionCoordinatorSpec.scala | 202 + .../architecture/scheduling/RegionPlanSpec.scala | 268 + .../architecture/scheduling/RegionSpec.scala | 124 + .../architecture/scheduling/ScheduleSpec.scala | 89 + .../scheduling/SchedulingUtilsSpec.scala | 140 + .../WorkflowExecutionCoordinatorSpec.scala | 263 + .../scheduling/config/ChannelConfigSpec.scala | 211 + .../scheduling/config/LinkConfigSpec.scala | 264 + .../scheduling/config/SchedulingConfigsSpec.scala | 311 + .../resourcePolicies/ResourcePoliciesSpec.scala | 174 + .../partitioners/NetworkOutputBufferSpec.scala | 274 + .../partitioners/PartitionersSpec.scala | 217 + .../engine/architecture/worker/PauseTypeSpec.scala | 145 + .../engine/architecture/worker/WorkerSpec.scala | 6 +- .../OutputPortStorageWriterThreadSpec.scala | 151 + .../worker/managers/WorkerManagersSpec.scala | 272 + .../worker/promisehandlers/EndHandlerSpec.scala | 113 + .../engine/common/CheckpointSubsystemSpec.scala | 177 + .../texera/amber/engine/common/UtilsSpec.scala | 156 + .../ambermessage/AmberMessageEnvelopesSpec.scala | 150 + .../common/ambermessage/DataPayloadSpec.scala | 84 + .../common/statetransition/StateManagerSpec.scala | 151 + .../statetransition/WorkerStateManagerSpec.scala | 131 + .../amber/engine/e2e/DataProcessingSpec.scala | 4 +- .../apache/texera/amber/engine/e2e/PauseSpec.scala | 18 +- .../{PauseSpec.scala => ReconfigurationSpec.scala} | 119 +- .../apache/texera/amber/engine/e2e/TestUtils.scala | 132 +- .../engine/faulttolerance/CheckpointSpec.scala | 23 +- .../amber/engine/faulttolerance/LoggingSpec.scala | 2 +- .../apache/texera/amber/error/ErrorUtilsSpec.scala | 207 + .../texera/web/SubscriptionManagerSpec.scala | 83 + .../org/apache/texera/web/WebsocketInputSpec.scala | 116 + .../texera/web/auth/UserAuthenticatorSpec.scala | 78 + .../texera/web/auth/UserRoleAuthorizerSpec.scala | 60 + .../texera/web/resource/GmailResourceSpec.scala | 74 + .../resource/HuggingFaceModelResourceSpec.scala | 731 + .../user/project/ProjectAccessResourceSpec.scala | 132 + .../workflow/WorkflowExecutionsResourceSpec.scala | 601 +- .../pythonvirtualenvironment/PveResourceSpec.scala | 176 + .../ExecutionReconfigurationServiceSpec.scala | 149 + .../web/service/ExecutionResultServiceSpec.scala | 24 +- .../ExecutionsMetadataPersistServiceSpec.scala | 310 + .../web/storage/ExecutionStateStoreSpec.scala | 56 + .../apache/texera/web/storage/StateStoreSpec.scala | 161 + .../web/storage/WorkflowStateStoreSpec.scala | 24 +- .../apache/texera/workflow/LogicalLinkSpec.scala | 291 + .../texera/workflow/WorkflowCompilerSpec.scala | 212 + ...quirements.txt => system-requirements-lock.txt} | 104 +- bin/access-control-service.dockerfile | 31 +- bin/agent-service.dockerfile | 50 + bin/computing-unit-master.dockerfile | 64 +- bin/computing-unit-worker.dockerfile | 62 +- bin/config-service.dockerfile | 31 +- bin/file-service.dockerfile | 31 +- bin/fix-format.sh | 11 +- bin/k8s/Chart.yaml | 5 + bin/k8s/files/texera_lakekeeper.sql | 1 + bin/k8s/templates/external-names.yaml | 8 +- bin/k8s/templates/gateway-routes.yaml | 14 + bin/k8s/templates/lakekeeper-init-job.yaml | 137 + .../templates/postgresql-init-script-config.yaml | 6 + bin/k8s/templates/webserver-deployment.yaml | 11 + ...workflow-computing-unit-manager-deployment.yaml | 38 +- bin/k8s/values.yaml | 32 + bin/licensing/audit_jar_licenses.py | 193 + bin/licensing/check_binary_deps.py | 647 + bin/licensing/concat_license_binary.py | 327 + bin/licensing/test_check_binary_deps.py | 485 + bin/protoc-version.txt | 1 + bin/pylsp/Dockerfile | 9 + bin/python-proto-gen.sh | 12 +- bin/single-node/.env | 14 + bin/single-node/DISCLAIMER | 8 + LICENSE => bin/single-node/LICENSE | 37 - bin/single-node/README.md | 71 +- bin/single-node/docker-compose.yml | 46 +- .../single-node/litellm-config.yaml | 18 +- bin/single-node/nginx.conf | 27 + bin/texera-web-application.dockerfile | 69 +- bin/workflow-compiling-service.dockerfile | 31 +- ...flow-computing-unit-managing-service.dockerfile | 31 +- bin/y-websocket-server/Dockerfile | 9 + build.sbt | 61 +- codecov.yml | 83 + common/auth/build.sbt | 4 +- .../org/apache/texera/auth/JwtAuthFilter.scala | 30 +- .../scala/org/apache/texera/auth/JwtParser.scala | 62 +- .../apache/texera/auth/UserActivityTracker.scala | 169 + .../org/apache/texera/auth/JwtParserSpec.scala | 200 + .../texera/auth/UserActivityTrackerSpec.scala | 143 + common/config/src/main/resources/default.conf | 3 + common/config/src/main/resources/gui.conf | 4 + common/config/src/main/resources/storage.conf | 6 +- .../amber/config/EnvironmentalVariable.scala | 1 + .../config/{AkkaConfig.scala => PekkoConfig.scala} | 4 +- .../config/{AkkaConfig.scala => PythonUtils.scala} | 15 +- .../apache/texera/amber/config/StorageConfig.scala | 3 + .../scala/org/apache/texera/config/GuiConfig.scala | 2 + common/dao/build.sbt | 3 +- .../scala/org/apache/texera/dao/SiteSettings.scala | 57 + .../scala/org/apache/texera/dao/SqlServer.scala | 50 +- .../scala/org/apache/texera/dao/MockTexeraDB.scala | 1 - .../org/apache/texera/dao/SiteSettingsSpec.scala | 45 + .../org/apache/texera/dao/SqlServerSpec.scala | 234 + .../texera/amber/pybuilder/BoundaryValidator.scala | 96 +- .../amber/pybuilder/EncodableInspector.scala | 56 +- .../amber/pybuilder/PythonTemplateBuilder.scala | 346 +- .../amber/pybuilder/BoundaryValidatorSpec.scala | 72 + .../pybuilder/PythonTemplateBuilderApiSpec.scala | 247 + .../pybuilder/PythonTemplateBuilderSpec.scala | 50 +- common/workflow-core/build.sbt | 2 +- .../org/apache/texera/amber/core/state/State.scala | 91 +- .../amber/core/storage/DocumentFactory.scala | 56 +- .../texera/amber/core/storage/FileResolver.scala | 6 +- .../texera/amber/core/storage/VFSURIFactory.scala | 26 +- .../storage/result/iceberg/IcebergDocument.scala | 245 +- .../core/storage/util/LakeFSStorageClient.scala | 2 +- .../org/apache/texera/amber/core/tuple/Tuple.scala | 2 +- .../texera/amber/core/workflow/PartitionInfo.scala | 1 + .../texera/amber/core/workflow/PhysicalOp.scala | 8 +- .../amber/core/workflow/WorkflowContext.scala | 3 +- .../org/apache/texera/amber/util/ArrowUtils.scala | 45 +- .../org/apache/texera/amber/util/IcebergUtil.scala | 44 +- .../org/apache/texera/amber/util/JSONUtils.scala | 2 + .../texera/amber/util/VirtualIdentityUtils.scala | 12 +- .../amber/util/serde/GlobalPortIdentitySerde.scala | 19 +- .../texera/service/util/LargeBinaryManager.scala | 2 +- .../texera/service/util/S3StorageClient.scala | 6 +- .../amber/core/WorkflowRuntimeExceptionSpec.scala | 83 + .../core/executor/CoreExecutorReflectionSpec.scala | 229 + .../apache/texera/amber/core/state/StateSpec.scala | 131 + .../amber/core/storage/VFSURIFactorySpec.scala | 116 + .../amber/core/storage/util/StorageUtilSpec.scala | 74 + .../amber/core/tuple/InternalMarkerSpec.scala | 76 + .../texera/amber/core/tuple/TupleUtilsSpec.scala | 110 + .../amber/core/workflow/PartitionInfoSpec.scala | 199 + .../amber/core/workflow/WorkflowContextSpec.scala | 62 + .../core/workflow/WorkflowCoreTypesSpec.scala | 340 + .../result/iceberg/IcebergDocumentSpec.scala | 168 +- .../result/iceberg/IcebergTableStatsSpec.scala | 22 +- .../apache/texera/amber/util/ArrowUtilsSpec.scala | 285 + .../apache/texera/amber/util/IcebergUtilSpec.scala | 10 + .../apache/texera/amber/util/JSONUtilsSpec.scala | 168 + .../amber/util/VirtualIdentityUtilsSpec.scala | 166 + .../amber/util/serde/PortIdentitySerdeSpec.scala | 320 + .../jsonSchema/annotations/JsonSchemaInject.java | 4 +- .../texera/amber/operator/TestOperators.scala | 40 +- .../amber/operator/ifStatement/IfOpExec.scala | 3 +- .../sklearn/SklearnDummyClassifierOpDesc.scala | 2 +- .../training/SklearnTrainingBaggingOpDesc.scala | 2 +- .../SklearnTrainingDummyClassifierOpDesc.scala | 2 +- .../operator/source/fetcher/URLFetcherOpDesc.scala | 6 +- .../source/scan/csv/CSVScanSourceOpDesc.scala | 13 +- .../source/scan/csv/CSVScanSourceOpExec.scala | 46 +- .../scan/csv/ParallelCSVScanSourceOpDesc.scala | 7 +- .../scan/csvOld/CSVOldScanSourceOpDesc.scala | 7 +- .../source/scan/json/JSONLScanSourceOpDesc.scala | 2 +- .../source/scan/json/JSONLScanSourceOpExec.scala | 3 +- .../amber/operator/source/scan/json/JSONUtil.scala | 72 - .../operator/source/sql/SQLSourceOpExec.scala | 28 +- .../sql/asterixdb/AsterixDBSourceOpExec.scala | 2 +- .../python/DualInputPortsPythonUDFOpDescV2.scala | 25 + .../operator/udf/python/PythonUDFOpDescV2.scala | 24 + .../python/source/PythonUDFSourceOpDescV2.scala | 25 + .../visualization/DotPlot/DotPlotOpDesc.scala | 1 - .../IcicleChart/IcicleChartOpDesc.scala | 1 - .../ImageViz/ImageVisualizerOpDesc.scala | 1 - .../ScatterMatrixChartOpDesc.scala | 1 - .../visualization/barChart/BarChartOpDesc.scala | 1 - .../boxViolinPlot/BoxViolinPlotOpDesc.scala | 1 - .../bubbleChart/BubbleChartOpDesc.scala | 1 - .../candlestickChart/CandlestickChartOpDesc.scala | 1 - .../ContinuousErrorBandsOpDesc.scala | 1 - .../contourPlot/ContourPlotOpDesc.scala | 1 - .../dumbbellPlot/DumbbellPlotOpDesc.scala | 3 +- .../FigureFactoryTableOpDesc.scala | 1 - .../filledAreaPlot/FilledAreaPlotOpDesc.scala | 3 +- .../funnelPlot/FunnelPlotOpDesc.scala | 1 - .../ganttChart/GanttChartOpDesc.scala | 1 - .../visualization/heatMap/HeatMapOpDesc.scala | 1 - .../hierarchychart/HierarchyChartOpDesc.scala | 1 - .../histogram/HistogramChartOpDesc.scala | 1 - .../visualization/lineChart/LineChartOpDesc.scala | 4 +- .../networkGraph/NetworkGraphOpDesc.scala | 1 - .../visualization/pieChart/PieChartOpDesc.scala | 1 - .../quiverPlot/QuiverPlotOpDesc.scala | 1 - .../sankeyDiagram/SankeyDiagramOpDesc.scala | 1 - .../scatter3DChart/Scatter3dChartOpDesc.scala | 1 - .../scatterplot/ScatterplotOpDesc.scala | 1 - .../tablesChart/TablesPlotOpDesc.scala | 1 - .../ternaryPlot/TernaryPlotOpDesc.scala | 1 - .../volcanoPlot/VolcanoPlotOpDesc.scala | 1 - .../waterfallChart/WaterfallChartOpDesc.scala | 1 - .../amber/operator/aggregate/AggregateOpSpec.scala | 35 + .../aggregate/AggregationOperationSpec.scala | 186 + .../amber/operator/flatmap/FlatMapOpExecSpec.scala | 118 + .../amber/operator/ifStatement/IfOpExecSpec.scala | 132 + .../operator/intersect/IntersectOpDescSpec.scala | 82 + .../amber/operator/limit/LimitOpExecSpec.scala | 85 + .../texera/amber/operator/map/MapOpExecSpec.scala | 125 + .../metadata/OperatorBooleanDefaultSpec.scala | 53 + .../amber/operator/sink/ProgressiveUtilsSpec.scala | 219 + .../sklearn/SklearnOpDescRegistrySpec.scala | 398 + .../amber/operator/sleep/SleepOpDescSpec.scala | 81 + .../operator/sort/StableMergeSortOpExecSpec.scala | 2 +- .../source/fetcher/URLFetcherOpDescSpec.scala | 106 + .../source/fetcher/URLFetcherOpExecSpec.scala | 3 - .../source/scan/csv/CSVScanSourceOpDescSpec.scala | 50 + .../source/scan/csv/CSVScanSourceOpExecSpec.scala | 110 + .../ImageViz/ImageVisualizerOpDescSpec.scala | 59 +- .../barChart/BarChartOpDescSpec.scala | 62 +- .../bulletChart/BulletChartOpDescSpec.scala | 119 + .../funnelPlot/FunnelPlotOpDescSpec.scala | 95 + .../visualization/heatMap/HeatMapOpDescSpec.scala | 75 + .../lineChart/LineChartOpDescSpec.scala | 129 + .../pieChart/PieChartOpDescSpec.scala | 70 +- .../volcanoPlot/VolcanoPlotOpDescSpec.scala | 95 + computing-unit-managing-service/LICENSE-binary | 652 + computing-unit-managing-service/NOTICE-binary | 1822 ++ computing-unit-managing-service/build.sbt | 17 + .../service/ComputingUnitManagingService.scala | 29 +- .../resource/ComputingUnitManagingResource.scala | 2 +- .../ComputingUnitManagingServiceRunSpec.scala | 48 + LICENSE => config-service/LICENSE-binary | 253 +- config-service/NOTICE-binary | 1546 ++ config-service/build.sbt | 10 + .../org/apache/texera/service/ConfigService.scala | 26 +- .../texera/service/resource/ConfigResource.scala | 26 +- .../texera/service/ConfigServiceRunSpec.scala | 48 + .../service/resource/ConfigResourceAuthSpec.scala | 187 + docs/_index.md | 28 + docs/concepts/_index.md | 38 + docs/contribution-guidelines/_index.md | 160 + .../apache-license-header.md | 32 + .../guide-for-developers.md | 357 + .../guide-to-frontend-development.md | 52 + .../guide-to-implement-java-operator.md | 283 + .../guide-to-implement-python-operator.md | 91 + .../making-contributions.md | 12 + .../micro-services-local-dev.md | 58 + .../release-email-template.md | 90 + docs/examples/_index.md | 58 + docs/getting-started/AccessLogin.md | 39 + docs/getting-started/_index.md | 20 + docs/getting-started/install-texera.md | 10 + .../getting-started/installing-using-docker.md | 27 +- docs/getting-started/run-on-kubernetes.md | 137 + docs/getting-started/ui-overview.md | 32 + docs/overview.md | 43 + docs/reference/_index.md | 62 + docs/reference/configuration.md | 5 + docs/reference/engine.md | 5 + docs/reference/frontend.md | 5 + docs/reference/operators/_index.md | 47 + docs/reference/operators/control-block/_index.md | 18 + docs/reference/operators/control-block/if.md | 22 + docs/reference/operators/control-block/sleep.md | 21 + docs/reference/operators/data-cleaning/_index.md | 28 + .../operators/data-cleaning/aggregate/_index.md | 17 + .../operators/data-cleaning/aggregate/aggregate.md | 25 + docs/reference/operators/data-cleaning/distinct.md | 15 + docs/reference/operators/data-cleaning/filter.md | 24 + .../operators/data-cleaning/join/_index.md | 19 + .../data-cleaning/join/cartesian-product.md | 15 + .../operators/data-cleaning/join/hash-join.md | 23 + .../operators/data-cleaning/join/interval-join.md | 26 + docs/reference/operators/data-cleaning/limit.md | 21 + .../operators/data-cleaning/projection.md | 24 + .../operators/data-cleaning/set/_index.md | 20 + .../operators/data-cleaning/set/difference.md | 15 + .../operators/data-cleaning/set/intersect.md | 15 + .../data-cleaning/set/symmetricdifference.md | 15 + .../reference/operators/data-cleaning/set/union.md | 15 + .../operators/data-cleaning/sort/_index.md | 19 + .../data-cleaning/sort/sort-partitions.md | 23 + .../reference/operators/data-cleaning/sort/sort.md | 23 + .../data-cleaning/sort/stable-merge-sort.md | 23 + .../operators/data-cleaning/type-casting.md | 23 + docs/reference/operators/data-input/_index.md | 24 + .../operators/data-input/arrow-file-scan.md | 23 + .../operators/data-input/csv-file-scan.md | 26 + .../operators/data-input/csvold-file-scan.md | 26 + docs/reference/operators/data-input/file-lister.md | 21 + .../operators/data-input/file-scan-from-input.md | 27 + docs/reference/operators/data-input/file-scan.md | 28 + .../operators/data-input/jsonl-file-scan.md | 25 + docs/reference/operators/data-input/text-input.md | 25 + .../operators/database-connector/_index.md | 19 + .../database-connector/asterixdb-source.md | 45 + .../operators/database-connector/mysql-source.md | 36 + .../database-connector/postgresql-source.md | 36 + docs/reference/operators/external-api/_index.md | 20 + .../operators/external-api/reddit-search.md | 25 + .../twitter-full-archive-search-api.md | 27 + .../operators/external-api/twitter-search-api.md | 25 + .../operators/external-api/url-fetcher.md | 22 + .../reference/operators/machine-learning/_index.md | 17 + .../machine-learning/advanced-sklearn/_index.md | 20 + .../advanced-sklearn/knn-classifier.md | 23 + .../advanced-sklearn/knn-regressor.md | 23 + .../advanced-sklearn/svm-classifier.md | 23 + .../advanced-sklearn/svm-regressor.md | 23 + .../machine-learning/hugging-face/_index.md | 20 + .../hugging-face-iris-logistic-regression.md | 24 + .../hugging-face-sentiment-analysis.md | 24 + .../hugging-face/hugging-face-spam-detection.md | 23 + .../hugging-face-text-summarization.md | 22 + .../machine-learning-general/_index.md | 17 + .../machine-learning-scorer.md | 25 + .../operators/machine-learning/sklearn/_index.md | 48 + .../machine-learning/sklearn/adaptive-boosting.md | 24 + .../operators/machine-learning/sklearn/bagging.md | 24 + .../sklearn/bernoulli-naive-bayes.md | 24 + .../sklearn/complement-naive-bayes.md | 24 + .../machine-learning/sklearn/decision-tree.md | 24 + .../machine-learning/sklearn/dummy-classifier.md | 24 + .../machine-learning/sklearn/extra-tree.md | 24 + .../machine-learning/sklearn/extra-trees.md | 24 + .../sklearn/gaussian-naive-bayes.md | 24 + .../machine-learning/sklearn/gradient-boosting.md | 24 + .../sklearn/k-nearest-neighbors.md | 24 + .../machine-learning/sklearn/linear-perceptron.md | 24 + .../machine-learning/sklearn/linear-regression.md | 22 + .../sklearn/linear-support-vector-machine.md | 24 + .../logistic-regression-cross-validation.md | 24 + .../sklearn/logistic-regression.md | 24 + .../sklearn/multi-layer-perceptron.md | 24 + .../sklearn/multinomial-naive-bayes.md | 24 + .../machine-learning/sklearn/nearest-centroid.md | 24 + .../machine-learning/sklearn/passive-aggressive.md | 24 + .../sklearn/probability-calibration.md | 24 + .../machine-learning/sklearn/random-forest.md | 24 + .../sklearn/ridge-regression-cross-validation.md | 24 + .../machine-learning/sklearn/ridge-regression.md | 24 + .../machine-learning/sklearn/sklearn-prediction.md | 23 + .../machine-learning/sklearn/sklearn-testing.md | 23 + .../sklearn/sklearn-training/_index.md | 42 + .../sklearn-training/training-adaptive-boosting.md | 24 + .../sklearn-training/training-bagging-training.md | 24 + .../training-bernoulli-naive-bayes.md | 24 + .../training-complement-naive-bayes.md | 24 + .../sklearn-training/training-decision-tree.md | 24 + .../sklearn-training/training-dummy-classifier.md | 24 + .../sklearn-training/training-extra-tree.md | 24 + .../sklearn-training/training-extra-trees.md | 24 + .../training-gaussian-naive-bayes.md | 24 + .../sklearn-training/training-gradient-boosting.md | 24 + .../training-k-nearest-neighbors.md | 24 + .../sklearn-training/training-linear-perceptron.md | 24 + .../sklearn-training/training-linear-regression.md | 24 + .../training-linear-support-vector-machine.md | 24 + ...raining-logistic-regression-cross-validation.md | 24 + .../training-logistic-regression.md | 24 + .../training-multi-layer-perceptron.md | 24 + .../training-multinomial-naive-bayes.md | 24 + .../sklearn-training/training-nearest-centroid.md | 24 + .../training-passive-aggressive.md | 24 + .../training-probability-calibration.md | 24 + .../sklearn-training/training-random-forest.md | 24 + .../training-ridge-regression-cross-validation.md | 24 + .../sklearn-training/training-ridge-regression.md | 24 + .../training-stochastic-gradient-descent.md | 24 + .../training-support-vector-machine.md | 24 + .../sklearn/stochastic-gradient-descent.md | 24 + .../sklearn/support-vector-machine.md | 24 + docs/reference/operators/output-modes.md | 24 + docs/reference/operators/parameters/_index.md | 17 + .../parameters/sklearn-advanced-knn-parameters.md | 27 + .../parameters/sklearn-advanced-svc-parameters.md | 26 + .../parameters/sklearn-advanced-svr-parameters.md | 30 + docs/reference/operators/search/_index.md | 20 + .../operators/search/dictionary-matcher.md | 24 + docs/reference/operators/search/keyword-search.md | 22 + .../operators/search/regular-expression.md | 23 + .../reference/operators/search/substring-search.md | 23 + .../operators/user-defined-functions/_index.md | 16 + .../user-defined-functions/java/_index.md | 17 + .../user-defined-functions/java/java-udf.md | 48 + .../python/1-out-python-udf.md | 40 + .../python/2-in-python-udf.md | 56 + .../user-defined-functions/python/_index.md | 21 + .../python/python-lambda-function.md | 24 + .../python/python-table-reducer.md | 24 + .../user-defined-functions/python/python-udf.md | 56 + .../user-defined-functions/r/1-out-r-udf.md | 43 + .../operators/user-defined-functions/r/_index.md | 18 + .../operators/user-defined-functions/r/r-udf.md | 44 + docs/reference/operators/utilities/_index.md | 20 + .../operators/utilities/random-k-sampling.md | 21 + .../operators/utilities/reservoir-sampling.md | 21 + docs/reference/operators/utilities/split.md | 24 + .../reference/operators/utilities/unnest-string.md | 23 + docs/reference/operators/visualization/_index.md | 26 + .../operators/visualization/advanced/_index.md | 18 + .../visualization/advanced/choropleth-map.md | 22 + .../visualization/advanced/scatter3d-chart.md | 23 + .../operators/visualization/basic/_index.md | 32 + .../operators/visualization/basic/bar-chart.md | 25 + .../operators/visualization/basic/bubble-chart.md | 25 + .../operators/visualization/basic/dot-plot.md | 21 + .../operators/visualization/basic/dumbbell-plot.md | 28 + .../visualization/basic/figure-factory-table.md | 25 + .../visualization/basic/filled-area-plot.md | 26 + .../operators/visualization/basic/gantt-chart.md | 25 + .../visualization/basic/hierarchy-chart.md | 24 + .../operators/visualization/basic/icicle-chart.md | 23 + .../operators/visualization/basic/line-chart.md | 28 + .../operators/visualization/basic/pie-chart.md | 22 + .../operators/visualization/basic/range-slider.md | 23 + .../visualization/basic/sankey-diagram.md | 23 + .../operators/visualization/basic/scatter-plot.md | 27 + .../operators/visualization/basic/tables-plot.md | 22 + .../visualization/basic/time-series-plot.md | 26 + .../operators/visualization/financial/_index.md | 21 + .../visualization/financial/bullet-chart.md | 26 + .../visualization/financial/candlestick-chart.md | 25 + .../visualization/financial/funnel-plot.md | 23 + .../visualization/financial/gauge-chart.md | 26 + .../visualization/financial/waterfall-chart.md | 22 + .../operators/visualization/media/_index.md | 20 + .../visualization/media/html-visualizer.md | 21 + .../visualization/media/image-visualizer.md | 21 + .../visualization/media/url-visualizer.md | 21 + .../operators/visualization/media/word-cloud.md | 22 + .../operators/visualization/nested-table.md | 24 + .../operators/visualization/scientific/_index.md | 30 + .../visualization/scientific/carpet-plot.md | 23 + .../visualization/scientific/contour-plot.md | 26 + .../visualization/scientific/dendrogram.md | 24 + .../operators/visualization/scientific/heatmap.md | 23 + .../visualization/scientific/network-graph.md | 23 + .../scientific/parallel-coordinates-plot.md | 22 + .../visualization/scientific/polar-chart.md | 22 + .../visualization/scientific/quiver-plot.md | 24 + .../visualization/scientific/radar-chart.md | 23 + .../visualization/scientific/radar-plot.md | 28 + .../visualization/scientific/ternary-contour.md | 24 + .../visualization/scientific/ternary-plot.md | 25 + .../visualization/scientific/volcano-plot.md | 22 + .../visualization/scientific/wind-rose-chart.md | 23 + .../operators/visualization/statistical/_index.md | 24 + .../visualization/statistical/boxviolin-plot.md | 24 + .../statistical/continuous-error-bands.md | 31 + .../empirical-cumulative-distribution-plot.md | 28 + .../visualization/statistical/histogram.md | 25 + .../visualization/statistical/histogram2d.md | 25 + .../statistical/scatter-matrix-chart.md | 22 + .../visualization/statistical/strip-chart.md | 24 + .../visualization/statistical/tree-plot.md | 21 + docs/reference/project-structure.md | 5 + docs/reference/storage.md | 5 + docs/security/_index.md | 242 + docs/system-screenshot.png | Bin 0 -> 1856289 bytes docs/tutorials/_index.md | 68 + docs/tutorials/create-dataset-upload-data.md | 35 + docs/tutorials/guide-for-how-to-use-texera.md | 46 + docs/tutorials/guide-to-enable-llm-agent.md | 96 + docs/tutorials/guide-to-launch-lakekeeper.md | 119 + docs/tutorials/guide-to-use-python-udf.md | 164 + docs/tutorials/migrate-jupyter-notebook.md | 190 + file-service/LICENSE-binary | 613 + file-service/NOTICE-binary | 1807 ++ file-service/build.sbt | 10 + .../texera/service/resource/DatasetResource.scala | 133 +- .../type/serde/DatasetFileNodeSerializer.java | 4 +- .../service/resource/DatasetResourceSpec.scala | 189 +- .../type/serde/DatasetFileNodeSerializerSpec.scala | 100 + frontend/.eslintrc.json | 49 +- frontend/.gitignore | 6 + frontend/AGENTS.md | 37 + LICENSE => frontend/LICENSE-binary | 215 +- frontend/README.md | 49 +- frontend/TESTING.md | 389 + frontend/angular.json | 85 +- frontend/custom-webpack.config.js | 25 + frontend/karma.conf.js | 70 - frontend/nx.json | 3 - frontend/package.json | 108 +- frontend/proxy.config.json | 21 +- frontend/src/app/app.component.ts | 1 + frontend/src/app/app.module.ts | 173 +- frontend/src/app/common/formly/array.type.ts | 17 +- .../collab-wrapper/collab-wrapper.component.ts | 2 + frontend/src/app/common/formly/formly-config.ts | 2 + frontend/src/app/common/formly/multischema.type.ts | 4 +- frontend/src/app/common/formly/object.type.ts | 4 +- .../preset-wrapper.component.spec.ts | 574 +- .../preset-wrapper/preset-wrapper.component.ts | 20 + .../formly/repeat-dnd/repeat-dnd.component.ts | 22 +- .../app/common/service/gmail/gmail.service.spec.ts | 66 + .../src/app/common/service/gmail/gmail.service.ts | 3 +- .../app/common/service/gui-config.service.mock.ts | 5 + .../app/common/service/gui-config.service.spec.ts | 194 + .../src/app/common/service/gui-config.service.ts | 86 +- .../service/notification/notification.service.ts | 4 +- .../user/config/user-config.service.spec.ts | 316 +- .../registration-request-modal.component.ts | 4 + .../app/common/service/user/user.service.spec.ts | 64 + .../src/app/common/service/user/user.service.ts | 29 +- .../workflow-persist.service.spec.ts | 38 +- frontend/src/app/common/type/gui-config.ts | 2 + .../src/app/common/util/computing-unit.util.ts | 1 + frontend/src/app/common/util/format.util.spec.ts | 140 + frontend/src/app/common/util/format.util.ts | 36 + .../app/common/util/size-formatter.util.spec.ts | 2 +- .../execution/admin-execution.component.spec.ts | 11 +- .../admin/execution/admin-execution.component.ts | 41 +- .../component/admin/gmail/admin-gmail.component.ts | 26 +- .../admin/settings/admin-settings.component.html | 76 +- .../admin/settings/admin-settings.component.scss | 11 + .../settings/admin-settings.component.spec.ts | 12 +- .../admin/settings/admin-settings.component.ts | 63 +- .../component/admin/user/admin-user.component.html | 2 +- .../admin/user/admin-user.component.spec.ts | 12 +- .../component/admin/user/admin-user.component.ts | 53 +- .../dashboard/component/dashboard.component.html | 12 + .../dashboard/component/dashboard.component.scss | 21 + .../component/dashboard.component.spec.ts | 45 +- .../app/dashboard/component/dashboard.component.ts | 39 +- .../files-uploader/files-uploader.component.ts | 34 +- .../filters-instructions.component.spec.ts | 3 +- .../filters-instructions.component.ts | 4 + .../component/user/filters/filters.component.html | 4 +- .../user/filters/filters.component.spec.ts | 5 +- .../component/user/filters/filters.component.ts | 47 +- .../component/user/flarum/flarum.component.ts | 4 +- .../user/list-item/list-item.component.html | 4 +- .../user/list-item/list-item.component.spec.ts | 62 +- .../user/list-item/list-item.component.ts | 66 +- .../markdown-description.component.ts | 21 + .../user/search-bar/search-bar.component.spec.ts | 247 + .../user/search-bar/search-bar.component.ts | 14 + .../search-results/search-results.component.ts | 22 + .../component/user/search/search.component.ts | 19 +- .../share-access/share-access.component.spec.ts | 515 + .../user/share-access/share-access.component.ts | 51 +- .../user/sort-button/sort-button.component.ts | 19 + .../user/user-avatar/user-avatar.component.spec.ts | 4 +- .../user/user-avatar/user-avatar.component.ts | 5 + .../user-computing-unit-list-item.component.html | 2 +- .../user-computing-unit-list-item.component.ts | 55 +- .../user-computing-unit.component.spec.ts | 35 +- .../user-computing-unit.component.ts | 42 +- .../dataset-detail.component.html | 218 +- .../dataset-detail.component.scss | 71 +- .../dataset-detail.component.ts | 111 +- .../user-dataset-file-renderer.component.scss | 3 +- .../user-dataset-file-renderer.component.spec.ts | 10 +- .../user-dataset-file-renderer.component.ts | 28 + .../user-dataset-staged-objects-list.component.ts | 22 + .../user-dataset-version-creator.component.ts | 23 +- .../user-dataset-version-filetree.component.ts | 17 +- .../user-dataset-list-item.component.html | 2 +- .../user-dataset-list-item.component.spec.ts | 281 + .../user-dataset-list-item.component.ts | 39 + .../user/user-dataset/user-dataset.component.html | 2 +- .../user-dataset/user-dataset.component.spec.ts | 312 + .../user/user-dataset/user-dataset.component.ts | 22 + .../user/user-icon/user-icon.component.spec.ts | 10 +- .../user/user-icon/user-icon.component.ts | 12 + .../public-project/public-project.component.ts | 31 + .../user-project-list-item.component.html | 6 +- .../user-project-list-item.component.spec.ts | 39 +- .../user-project-list-item.component.ts | 56 + .../ngbd-modal-add-project-workflow.component.ts | 19 + ...ngbd-modal-remove-project-workflow.component.ts | 19 + .../user-project-section.component.ts | 28 + .../user/user-project/user-project.component.ts | 34 + .../user/user-quota/user-quota.component.html | 4 +- .../user/user-quota/user-quota.component.spec.ts | 29 +- .../user/user-quota/user-quota.component.ts | 53 +- .../workflow-execution-history.component.html | 4 +- .../workflow-execution-history.component.ts | 74 +- .../workflow-runtime-statistics.component.html | 4 +- .../workflow-runtime-statistics.component.ts | 24 +- .../highlight-search-terms.pipe.ts | 6 +- .../user-workflow-list-item.component.spec.ts | 49 +- .../user-workflow-list-item.component.ts | 46 + .../user-workflow/user-workflow.component.html | 6 +- .../user-workflow/user-workflow.component.spec.ts | 58 +- .../user/user-workflow/user-workflow.component.ts | 34 +- .../service/user/dataset/dataset.service.spec.ts | 356 + .../service/user/dataset/dataset.service.ts | 4 + .../service/user/download/download.service.spec.ts | 396 +- .../service/user/download/download.service.ts | 2 - .../dashboard/service/user/search.service.spec.ts | 317 + .../workflow-executions.service.spec.ts | 7 + .../workflow-version.service.spec.ts | 445 +- .../app/hub/component/about/about.component.html | 83 +- .../app/hub/component/about/about.component.scss | 8 +- .../hub/component/about/about.component.spec.ts | 56 +- .../src/app/hub/component/about/about.component.ts | 4 + .../about/local-login/local-login.component.html | 4 +- .../about/local-login/local-login.component.scss | 5 +- .../about/local-login/local-login.component.ts | 31 +- .../browse-section.component.spec.ts | 2 +- .../browse-section/browse-section.component.ts | 17 + .../hub-search-result.component.html | 2 +- .../hub-search-result.component.ts | 21 +- frontend/src/app/hub/component/hub.component.ts | 7 + .../landing-page/landing-page.component.spec.ts | 213 + .../landing-page/landing-page.component.ts | 2 + .../detail/hub-workflow-detail.component.ts | 31 +- .../agent-chat/agent-chat.component.html | 424 - .../agent-chat/agent-chat.component.spec.ts | 65 - .../agent-panel/agent-chat/agent-chat.component.ts | 277 - .../component/agent-panel/agent-panel.component.ts | 200 - .../agent-interaction.component.html | 161 + .../agent-interaction.component.scss | 283 + .../agent-interaction.component.ts | 287 + .../agent-chat/agent-chat.component.html | 455 + .../agent-chat/agent-chat.component.scss | 109 +- .../agent-panel/agent-chat/agent-chat.component.ts | 758 + .../agent-panel/agent-panel.component.html | 60 +- .../agent-panel/agent-panel.component.scss | 15 + .../agent/agent-panel/agent-panel.component.ts | 400 + .../agent-registration.component.html | 13 +- .../agent-registration.component.scss | 0 .../agent-registration.component.ts | 75 +- .../react-step-detail-modal.component.html | 438 + .../react-step-detail-modal.component.scss} | 1 + .../react-step-detail-modal.component.ts | 243 + .../annotation-suggestion.component.spec.ts | 87 + .../breakpoint-condition-input.component.spec.ts | 47 +- .../breakpoint-condition-input.component.ts | 8 +- .../code-debugger.component.spec.ts | 211 +- .../code-editor-dialog/code-debugger.component.ts | 20 +- .../code-editor.component.browser.spec.ts | 324 + .../code-editor.component.spec.ts | 308 +- .../code-editor-dialog/code-editor.component.ts | 27 +- .../codearea-custom-template.component.spec.ts | 11 +- .../codearea-custom-template.component.ts | 13 + .../dataset-file-selector.component.ts | 17 + .../dataset-selection-modal.component.ts | 22 + .../dataset-version-selector.component.ts | 17 + .../left-panel/left-panel.component.spec.ts | 12 +- .../component/left-panel/left-panel.component.ts | 34 +- .../operator-label.component.spec.ts | 11 +- .../operator-label/operator-label.component.ts | 3 + .../operator-menu/operator-menu.component.spec.ts | 43 +- .../operator-menu/operator-menu.component.ts | 25 +- .../left-panel/settings/settings.component.spec.ts | 172 + .../left-panel/settings/settings.component.ts | 5 +- .../time-travel/time-travel.component.spec.ts | 12 +- .../time-travel/time-travel.component.ts | 22 + .../versions-list/versions-list.component.html | 2 +- .../versions-list/versions-list.component.spec.ts | 12 +- .../versions-list/versions-list.component.ts | 31 + .../coeditor-user-icon.component.css | 1 - .../coeditor-user-icon.component.spec.ts | 16 +- .../coeditor-user-icon.component.ts | 14 + .../workspace/component/menu/menu.component.html | 12 +- .../workspace/component/menu/menu.component.scss | 53 +- .../component/menu/menu.component.spec.ts | 901 +- .../app/workspace/component/menu/menu.component.ts | 65 +- .../computing-unit-selection.component.html | 246 + .../computing-unit-selection.component.scss | 351 +- .../computing-unit-selection.component.spec.ts | 2 +- .../computing-unit-selection.component.ts | 486 +- .../operator-property-edit-frame.component.spec.ts | 39 +- .../operator-property-edit-frame.component.ts | 130 +- .../port-property-edit-frame.component.spec.ts | 3 +- .../port-property-edit-frame.component.ts | 21 +- .../property-editor.component.spec.ts | 11 +- .../property-editor/property-editor.component.ts | 29 +- .../type-casting-display.component.spec.ts | 11 +- .../type-casting-display.component.ts | 19 + .../result-exportation.component.ts | 42 + .../console-frame/console-frame.component.scss | 9 + .../console-frame/console-frame.component.spec.ts | 11 +- .../console-frame/console-frame.component.ts | 48 + .../error-frame/error-frame.component.spec.ts | 13 +- .../error-frame/error-frame.component.ts | 13 + .../result-panel/result-panel-modal.component.ts | 2 + .../result-panel/result-panel.component.html | 19 +- .../result-panel/result-panel.component.scss | 6 + .../result-panel/result-panel.component.spec.ts | 11 +- .../result-panel/result-panel.component.ts | 35 +- .../result-table-frame.component.html | 1 + .../result-table-frame.component.spec.ts | 26 +- .../result-table-frame.component.ts | 36 +- .../ui-udf-parameters.component.html} | 32 +- .../ui-udf-parameters.component.scss} | 17 +- .../ui-udf-parameters.component.spec.ts | 117 + .../ui-udf-parameters.component.ts | 118 + .../visualization-frame-content.component.scss | 6 +- .../nz-modal-comment-box.component.ts | 41 +- .../context-menu/context-menu.component.html | 28 +- .../context-menu/context-menu.component.spec.ts | 187 +- .../context-menu/context-menu.component.ts | 13 + .../mini-map/mini-map.component.spec.ts | 12 +- .../workflow-editor/mini-map/mini-map.component.ts | 15 + .../workflow-editor/workflow-editor.component.html | 20 + .../workflow-editor/workflow-editor.component.scss | 63 +- .../workflow-editor.component.spec.ts | 146 +- .../workflow-editor/workflow-editor.component.ts | 303 +- .../workspace/component/workspace.component.html | 4 +- .../component/workspace.component.spec.ts | 379 + .../app/workspace/component/workspace.component.ts | 95 +- .../src/app/workspace/service/agent/agent-types.ts | 81 + .../app/workspace/service/agent/agent.service.ts | 1341 ++ .../ui-udf-parameters-parser.service.spec.ts | 234 + .../ui-udf-parameters-parser.service.ts | 242 + .../ui-udf-parameters-sync.service.spec.ts | 222 + .../code-editor/ui-udf-parameters-sync.service.ts | 138 + .../workflow-compiling.service.spec.ts | 242 + .../compile-workflow/workflow-compiling.service.ts | 83 + .../workspace/service/copilot/copilot-prompts.ts | 35 - .../copilot/texera-copilot-manager.service.spec.ts | 259 - .../copilot/texera-copilot-manager.service.ts | 248 - .../service/copilot/texera-copilot.spec.ts | 189 - .../workspace/service/copilot/texera-copilot.ts | 388 - .../current-workflow-editing-observing-tools.ts | 127 - .../copilot/tool/react-step-operator-parser.ts | 152 - .../service/copilot/tool/tools-utility.ts | 138 - .../copilot/tool/workflow-metadata-tools.ts | 140 - .../service/drag-drop/drag-drop.service.spec.ts | 194 +- .../dynamic-schema/dynamic-schema.service.spec.ts | 20 +- .../dynamic-schema/dynamic-schema.service.ts | 6 +- .../execute-workflow.service.spec.ts | 47 +- .../execute-workflow/execute-workflow.service.ts | 4 +- .../service/joint-ui/joint-ui.service.spec.ts | 1020 +- .../workspace/service/joint-ui/joint-ui.service.ts | 166 +- .../operator-debug/udf-debug.service.spec.ts | 53 +- .../operator-menu/operator-menu.service.spec.ts | 143 +- .../service/operator-menu/operator-menu.service.ts | 162 +- .../operator-metadata.service.spec.ts | 8 +- .../service/preset/preset.service.spec.ts | 862 +- .../service/undo-redo/undo-redo.service.spec.ts | 4 +- .../validation/validation-workflow.service.spec.ts | 4 +- .../virtual-environment.service.ts | 95 + .../model/coeditor-presence.service.spec.ts | 8 +- .../model/coeditor-presence.service.ts | 2 +- .../model/joint-graph-wrapper.spec.ts | 27 +- .../workflow-graph/model/joint-graph-wrapper.ts | 26 +- .../workflow-graph/model/sync-texera-model.spec.ts | 410 +- .../model/workflow-action.service.ts | 7 +- .../workflow-graph/model/workflow-graph.spec.ts | 20 +- .../util/workflow-util.service.spec.ts | 2 +- .../workflow-result-export.service.spec.ts | 128 +- .../workflow-result.service.spec.ts | 37 +- .../operator-reuse-cache-status.service.spec.ts | 4 +- .../workflow-websocket.service.spec.ts | 8 +- frontend/src/assets/workflow_gui.png | Bin 247141 -> 1856289 bytes frontend/src/browser-buffer-polyfill.ts | 44 + frontend/src/jsdom-svg-polyfill.ts | 202 + .../local-login.component.scss => main.test.ts} | 20 +- frontend/src/main.ts | 6 +- frontend/src/styles.scss | 322 +- frontend/src/test-zone-setup.ts | 96 + frontend/src/tsconfig.app.json | 2 +- frontend/src/tsconfig.spec.json | 10 +- frontend/src/tsconfig.test.json | 16 + ...structions.component.ts => vitest-globals.d.ts} | 12 +- frontend/tsconfig.json | 17 +- frontend/vitest.browser.config.ts | 79 + frontend/vitest.config.ts | 53 + frontend/yarn.lock | 16529 ++++++++----------- licenses-3rd-party-code/angular.md | 9 + .../mbknor-jackson-jsonschema.txt | 21 + licenses-3rd-party-code/monaco-languageclient.txt | 9 + licenses/LICENSE-0BSD.txt | 12 + licenses/LICENSE-BSD-2-Clause.txt | 23 + licenses/LICENSE-BSD-3-Clause.txt | 27 + licenses/LICENSE-CDDL-1.0.txt | 71 + licenses/LICENSE-CDDL-1.1.txt | 123 + licenses/LICENSE-EDL-1.0.txt | 29 + licenses/LICENSE-EPL-1.0.txt | 210 + licenses/LICENSE-EPL-2.0.txt | 277 + licenses/LICENSE-ISC.txt | 13 + licenses/LICENSE-MIT-CMU.txt | 36 + licenses/LICENSE-MPL-2.0.txt | 151 + licenses/LICENSE-PSF-2.0.txt | 47 + licenses/LICENSE-Unlicense.txt | 24 + LICENSE => licenses/LICENSE-avro.txt | 47 +- LICENSE => licenses/LICENSE-aws-sdk.txt | 41 +- .../LICENSE-awssdk-third-party-jackson.txt | 39 +- LICENSE => licenses/LICENSE-commons-math3.txt | 222 +- licenses/LICENSE-glassfish-hk2.txt | 637 + LICENSE => licenses/LICENSE-guice.txt | 239 +- LICENSE => licenses/LICENSE-hadoop-shaded.txt | 54 +- LICENSE => licenses/LICENSE-hadoop.txt | 95 +- licenses/LICENSE-iceberg-bundled-guava.txt | 413 + LICENSE => licenses/LICENSE-iceberg.txt | 166 +- licenses/LICENSE-jackson-afterburner.txt | 42 + licenses/LICENSE-jackson-blackbird.txt | 42 + LICENSE => licenses/LICENSE-jackson-core.txt | 39 +- licenses/LICENSE-jakarta-ee.txt | 637 + licenses/LICENSE-javax-activation.txt | 263 + licenses/LICENSE-javax-ee-cddl.txt | 263 + licenses/LICENSE-javax-mail.txt | 759 + licenses/LICENSE-jaxb-api.txt | 274 + licenses/LICENSE-jersey.txt | 637 + licenses/LICENSE-jetty-11.0.txt | 483 + licenses/LICENSE-jetty-9.4.txt | 414 + licenses/LICENSE-jetty-jakarta-servlet-api.txt | 843 + licenses/LICENSE-lombok.txt | 104 + licenses/LICENSE-lucene.txt | 507 + licenses/LICENSE-netty-tcnative-boringssl.txt | 973 ++ licenses/LICENSE-netty.txt | 1484 ++ LICENSE => licenses/LICENSE-pekko-actor.txt | 151 +- LICENSE => licenses/LICENSE-pekko-cluster.txt | 43 +- LICENSE => licenses/LICENSE-pekko-protobuf-v3.txt | 42 +- LICENSE => licenses/LICENSE-pekko-remote.txt | 55 +- licenses/LICENSE-postgresql.txt | 145 + licenses/LICENSE-slf4j.txt | 24 + licenses/LICENSE-threeten-extra.txt | 29 + project/AddMetaInfLicenseFiles.scala | 59 +- project/JdkOptions.scala | 57 + project/plugins.sbt | 5 + .../operator-requirements.txt => sql/changelog.xml | 27 +- sql/docker-compose.yml | 49 + workflow-compiling-service/LICENSE-binary | 625 + workflow-compiling-service/NOTICE-binary | 1873 +++ workflow-compiling-service/build.sbt | 11 + .../texera/service/WorkflowCompilingService.scala | 23 +- .../resource/WorkflowCompilationResource.scala | 15 +- .../amber/compiler/WorkflowCompilerSpec.scala | 317 + .../service/WorkflowCompilingServiceRunSpec.scala | 48 + .../resource/WorkflowCompilationResourceSpec.scala | 257 +- 1122 files changed, 101436 insertions(+), 22967 deletions(-) diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/ControllerProcessor.scala index 5c5a29755a,ef33174b6b..cc2e655eca --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/ControllerProcessor.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/ControllerProcessor.scala @@@ -44,11 -44,9 +44,10 @@@ class ControllerProcessor val workflowScheduler: WorkflowScheduler = new WorkflowScheduler(workflowContext, actorId) val workflowExecutionCoordinator: WorkflowExecutionCoordinator = new WorkflowExecutionCoordinator( - () => this.workflowScheduler.getNextRegions, workflowExecution, controllerConfig, - asyncRPCClient + asyncRPCClient, + workflowContext.executionId ) private val initializer = new ControllerAsyncRPCHandlerInitializer(this) diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala index 19ef60c91c,43e8d281ce..2ce1f21513 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala @@@ -224,17 -174,12 +224,17 @@@ class CostBasedScheduleGenerator // Allocate an URI for each of these output ports val outputPortConfigs: Map[GlobalPortIdentity, OutputPortConfig] = outputPortIdsNeedingStorage.map { gpid => - val portBaseURI = createPortBaseURI( - workflowId = workflowContext.workflowId, - executionId = workflowContext.executionId, - globalPortId = gpid + val outputConfig = planningHints.outputPortConfigOverrides.getOrElse( + gpid, + OutputPortConfig( - createResultURI( ++ createPortBaseURI( + workflowId = workflowContext.workflowId, + executionId = workflowContext.executionId, + globalPortId = gpid + ) + ) ) - gpid -> OutputPortConfig(portBaseURI) + gpid -> outputConfig }.toMap val resourceConfig = ResourceConfig(portConfigs = outputPortConfigs) diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/RegionExecutionCoordinator.scala index cb5d3606f3,5a9df11b58..aa1ff1fd43 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/RegionExecutionCoordinator.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/RegionExecutionCoordinator.scala @@@ -104,14 -92,16 +105,15 @@@ import scala.concurrent.duration.{Durat */ class RegionExecutionCoordinator( region: Region, + isRestart: Boolean, workflowExecution: WorkflowExecution, + executionId: org.apache.texera.amber.core.virtualidentity.ExecutionIdentity, asyncRPCClient: AsyncRPCClient, controllerConfig: ControllerConfig, - actorService: AkkaActorService, - actorRefService: AkkaActorRefMappingService + actorService: PekkoActorService, + actorRefService: PekkoActorRefMappingService ) extends AmberLogging { - initRegionExecution() - private sealed trait RegionExecutionPhase private case object Unexecuted extends RegionExecutionPhase private case object ExecutingDependeePortsPhase extends RegionExecutionPhase @@@ -122,67 -111,14 +124,71 @@@ private val currentPhaseRef: AtomicReference[RegionExecutionPhase] = new AtomicReference( Unexecuted ) + private val terminationFutureRef: AtomicReference[Future[Unit]] = new AtomicReference(null) + private val killRetryTimer: Timer = new JavaTimer(true) + private val killRetryDelay: TwitterDuration = TwitterDuration.fromMilliseconds(200) + if (region.cached) { + completeCachedRegion() + } else { + initRegionExecution() + } + + /** + * Short-circuit a cached region by recording operator metrics and output URIs without workers, + * then emit stats and mark the region as completed. + */ + private def completeCachedRegion(): Unit = { + val regionExecution = workflowExecution.getRegionExecution(region.id) + val resourceConfig = region.resourceConfig.getOrElse(ResourceConfig()) + region.getOperators.foreach { op => + val opExecution = regionExecution.initOperatorExecution(op.id) + // Cached regions do not create workers; synthesize operator-level metrics instead. + val outputMetrics = resourceConfig.portConfigs + .collect { + case (gpid, cfg: OutputPortConfig) if gpid.opId == op.id => + // Emit metrics only for configured output ports in this cached region. + // Use -1 to preserve unknown cached counts in UI/stats instead of reporting 0. + val count = cfg.cachedTupleCount.getOrElse(-1L) + PortTupleMetricsMapping(gpid.portId, TupleMetrics(count, 0L)) + } + .toSeq + val inputMetrics = op.inputPorts.keys + // Use -1 to signal skipped/unknown input counts for cached operators. + .map(pid => PortTupleMetricsMapping(pid, TupleMetrics(-1L, -1L))) + .toSeq + val stats = OperatorMetrics( + WorkflowAggregatedState.COMPLETED_FROM_CACHE, + OperatorStatistics( + inputMetrics, + outputMetrics + ) + ) + opExecution.setCachedMetrics(stats) + } + recordCachedOutputPortResults(resourceConfig) + asyncRPCClient.sendToClient( + ExecutionStatsUpdate(workflowExecution.getAllRegionExecutionsStats) + ) + setPhase(CompletedFromCache) + } + + private def recordCachedOutputPortResults(resourceConfig: ResourceConfig): Unit = { + resourceConfig.portConfigs.collect { + case (gpid, cfg: OutputPortConfig) => + val storageUri = cfg.storageURI + WorkflowExecutionsResource.insertOperatorPortResultUri( + eid = executionId, + globalPortId = gpid, + uri = storageUri + ) + } + } + /** * Sync the status of `RegionExecution` and transition this coordinator's phase to `Completed` only when the - * coordinator is currently in `ExecutingNonDependeePortsPhase` and all the ports of this region are completed. + * coordinator is currently in `ExecutingNonDependeePortsPhase`, all the ports of this region are completed, and + * all workers in this region are terminated. * * Additionally, this method will also terminate all the workers of this region: * @@@ -261,11 -211,30 +281,33 @@@ } } + private def terminateWorkersWithRetry( + regionExecution: RegionExecution, + attempt: Int = 1 + ): Future[Unit] = { + terminateWorkers(regionExecution).rescue { + case err => + logger.warn( + s"Failed to terminate region ${region.id.id} on attempt $attempt. Retrying in ${killRetryDelay.inMilliseconds} ms.", + err + ) + Future + .sleep(killRetryDelay)(killRetryTimer) + .flatMap(_ => terminateWorkersWithRetry(regionExecution, attempt + 1)) + } + } + - def isCompleted: Boolean = currentPhaseRef.get == Completed + def isCompleted: Boolean = { + val phase = currentPhaseRef.get + phase == Completed || phase == CompletedFromCache + } + /** + * Returns the region termination future if termination has been initiated. + * This is only set by `tryCompleteRegionExecution()`. + */ + def getTerminationFutureOpt: Option[Future[Unit]] = Option(terminationFutureRef.get) + /** * This will sync and transition the region execution phase from one to another depending on its current phase: * @@@ -507,8 -465,8 +549,8 @@@ if gid == GlobalPortIdentity( opId = physicalOp.id, portId = outputPortId - ) => + ) && cfg.materialize => - cfg.storageURI.toString + cfg.storageURIBase.toString } .getOrElse("") Some( diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/WorkflowExecutionCoordinator.scala index 1b6074e51e,deb753beb3..161a116960 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/WorkflowExecutionCoordinator.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/WorkflowExecutionCoordinator.scala @@@ -23,13 -23,11 +23,15 @@@ import com.twitter.util.Futur import com.typesafe.scalalogging.LazyLogging import org.apache.texera.amber.core.workflow.{GlobalPortIdentity, PhysicalLink} import org.apache.texera.amber.engine.architecture.common.{ - AkkaActorRefMappingService, - AkkaActorService + PekkoActorRefMappingService, + PekkoActorService } + import org.apache.texera.amber.engine.architecture.controller.ControllerConfig + import org.apache.texera.amber.engine.architecture.controller.ExecutionStateUpdate +import org.apache.texera.amber.engine.architecture.controller.{ + ControllerConfig, + ExecutionStateUpdate +} import org.apache.texera.amber.engine.architecture.controller.execution.WorkflowExecution import org.apache.texera.amber.engine.common.rpc.AsyncRPCClient @@@ -36,13 -35,13 +39,14 @@@ import java.util.concurrent.atomic.Atom import scala.collection.mutable class WorkflowExecutionCoordinator( - getNextRegions: () => Set[Region], workflowExecution: WorkflowExecution, controllerConfig: ControllerConfig, - asyncRPCClient: AsyncRPCClient + asyncRPCClient: AsyncRPCClient, + executionId: org.apache.texera.amber.core.virtualidentity.ExecutionIdentity ) extends LazyLogging { + var schedule: Schedule = Schedule(Map.empty) + private val executedRegions: mutable.ListBuffer[Set[Region]] = mutable.ListBuffer() private val regionExecutionCoordinators diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/config/PortConfig.scala index cac339927d,56743ae095..f2ddc3b8c0 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/config/PortConfig.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/config/PortConfig.scala @@@ -32,19 -32,12 +32,19 @@@ sealed trait PortConfig } /** - * An output port requires exactly one materialization base URI. Result and - * state URIs hang off it via `VFSURIFactory.resultURI` / `stateURI`; this - * field is *not* a URI you can pass straight to `DocumentFactory`. + * Output port configuration for scheduling/runtime. + * - * @param storageURI URI bound at planning time for this output port. ++ * @param storageURIBase URI base bound at planning time for this output port. + * @param cachedTupleCount Optional cached tuple count for UI/metrics when serving from cache. + * @param materialize When false, this output is reuse-only (cache-hit) and must not be freshly materialized. */ -final case class OutputPortConfig(storageURIBase: URI) extends PortConfig { +final case class OutputPortConfig( - storageURI: URI, ++ storageURIBase: URI, + cachedTupleCount: Option[Long] = None, + materialize: Boolean = true +) + extends PortConfig { - override val storageURIs: List[URI] = List(storageURI) + override val storageURIs: List[URI] = List(storageURIBase) } /** diff --cc frontend/src/app/workspace/component/left-panel/left-panel.component.ts index 3a6db75604,ab5e32684b..ec6ab56376 --- a/frontend/src/app/workspace/component/left-panel/left-panel.component.ts +++ b/frontend/src/app/workspace/component/left-panel/left-panel.component.ts @@@ -19,11 -19,10 +19,11 @@@ import { AfterViewInit, Component, ElementRef, HostListener, OnDestroy, OnInit, Type, ViewChild } from "@angular/core"; import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy"; - import { NzResizeEvent } from "ng-zorro-antd/resizable"; - import { CdkDragDrop, moveItemInArray } from "@angular/cdk/drag-drop"; + import { NzResizeEvent, NzResizableDirective, NzResizeHandlesComponent } from "ng-zorro-antd/resizable"; + import { CdkDragDrop, moveItemInArray, CdkDropList, CdkDrag, CdkDragHandle } from "@angular/cdk/drag-drop"; import { OperatorMenuComponent } from "./operator-menu/operator-menu.component"; import { VersionsListComponent } from "./versions-list/versions-list.component"; +import { CachePanelComponent } from "./cache-panel/cache-panel.component"; import { WorkflowExecutionHistoryComponent } from "../../../dashboard/component/user/user-workflow/ngbd-modal-workflow-executions/workflow-execution-history.component"; import { TimeTravelComponent } from "./time-travel/time-travel.component"; import { SettingsComponent } from "./settings/settings.component"; diff --cc frontend/src/app/workspace/component/workflow-editor/context-menu/context-menu/context-menu.component.html index c55c352321,4465d65cb2..32c5c9a9b4 --- a/frontend/src/app/workspace/component/workflow-editor/context-menu/context-menu/context-menu.component.html +++ b/frontend/src/app/workspace/component/workflow-editor/context-menu/context-menu/context-menu.component.html @@@ -123,36 -123,10 +123,36 @@@ nzTheme="twotone"></span >remove reusing result </li> + <li + nz-menu-item + *ngIf="operatorMenuService.highlightedOperators.value.length === 1 && + operatorMenuService.highlightedCommentBoxes.value.length === 0 && + !hasHighlightedLinks() && + isWorkflowModifiable" + (click)="clearCacheForSelectedOperator()"> + <span + nz-icon + nzType="database" + nzTheme="outline"></span + >clear cache + </li> + <li + nz-menu-item - *ngIf="operatorMenuService.highlightedOperators.value.length === 1 && - operatorMenuService.highlightedCommentBoxes.value.length === 0 && ++ *ngIf="highlightedOperators.value.length === 1 && ++ highlightedCommentBoxes.value.length === 0 && + !hasHighlightedLinks() && + isWorkflowModifiable" + (click)="clearCacheUpToSelectedOperator()"> + <span + nz-icon + nzType="database" + nzTheme="twotone"></span + >clear cache up to this operator + </li> <li nz-menu-item - *ngIf="(operatorMenuService.highlightedOperators.value.length > 0 || - operatorMenuService.highlightedCommentBoxes.value.length > 0) && + *ngIf="(highlightedOperatorIds.length > 0 || + highlightedCommentBoxIds.length > 0) && isWorkflowModifiable" (click)="onDelete()"> <span diff --cc frontend/src/app/workspace/component/workflow-editor/context-menu/context-menu/context-menu.component.ts index b26f41d66a,019f77f7af..48dea8db00 --- a/frontend/src/app/workspace/component/workflow-editor/context-menu/context-menu/context-menu.component.ts +++ b/frontend/src/app/workspace/component/workflow-editor/context-menu/context-menu/context-menu.component.ts @@@ -27,8 -27,10 +27,12 @@@ import { NzModalService } from "ng-zorr import { ResultExportationComponent } from "../../../result-exportation/result-exportation.component"; import { ValidationWorkflowService } from "src/app/workspace/service/validation/validation-workflow.service"; import { GuiConfigService } from "../../../../../common/service/gui-config.service"; + import { NzMenuDirective, NzMenuItemComponent } from "ng-zorro-antd/menu"; + import { NgIf } from "@angular/common"; + import { ɵNzTransitionPatchDirective } from "ng-zorro-antd/core/transition-patch"; + import { NzIconDirective } from "ng-zorro-antd/icon"; +import { WorkflowExecutionsService } from "src/app/dashboard/service/user/workflow-executions/workflow-executions.service"; +import { WorkflowCacheEntriesService } from "src/app/workspace/service/workflow-status/workflow-cache-entries.service"; @UntilDestroy() @Component({ @@@ -46,11 -51,15 +53,17 @@@ export class ContextMenuComponent protected config: GuiConfigService, private workflowResultService: WorkflowResultService, private modalService: NzModalService, - private validationWorkflowService: ValidationWorkflowService + private validationWorkflowService: ValidationWorkflowService, + private workflowExecutionsService: WorkflowExecutionsService, + private cacheEntriesService: WorkflowCacheEntriesService ) { this.registerWorkflowModifiableChangedHandler(); + this.operatorMenuService.highlightedOperators$ + .pipe(untilDestroyed(this)) + .subscribe(ids => (this.highlightedOperatorIds = ids)); + this.operatorMenuService.highlightedCommentBoxes$ + .pipe(untilDestroyed(this)) + .subscribe(ids => (this.highlightedCommentBoxIds = ids)); } public canExecuteOperator(): boolean { diff --cc frontend/src/app/workspace/component/workflow-editor/workflow-editor.component.ts index 3aa4d0c5c3,54e9ec9a4e..39601d9347 --- a/frontend/src/app/workspace/component/workflow-editor/workflow-editor.component.ts +++ b/frontend/src/app/workspace/component/workflow-editor/workflow-editor.component.ts @@@ -27,13 -27,10 +27,13 @@@ import { ExecuteWorkflowService } from import { fromJointPaperEvent, JointUIService, linkPathStrokeColor } from "../../service/joint-ui/joint-ui.service"; import { ValidationWorkflowService } from "../../service/validation/validation-workflow.service"; import { WorkflowActionService } from "../../service/workflow-graph/model/workflow-action.service"; +import { CacheUsageService } from "../../service/workflow-status/cache-usage.service"; +import { WorkflowCacheEntriesService } from "../../service/workflow-status/workflow-cache-entries.service"; import { WorkflowStatusService } from "../../service/workflow-status/workflow-status.service"; import { ExecutionState, OperatorState } from "../../types/execute-workflow.interface"; - import { LogicalPort, OperatorLink } from "../../types/workflow-common.interface"; + import { LogicalPort, OperatorLink, OperatorPredicate } from "../../types/workflow-common.interface"; +import { WorkflowCacheEntry } from "../../../dashboard/type/workflow-cache-entry"; - import { auditTime, filter, map, takeUntil } from "rxjs/operators"; + import { auditTime, filter, map, takeUntil, withLatestFrom } from "rxjs/operators"; import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy"; import { UndoRedoService } from "../../service/undo-redo/undo-redo.service"; import { WorkflowVersionService } from "../../../dashboard/service/user/workflow-version/workflow-version.service"; @@@ -97,8 -100,16 +103,17 @@@ export class WorkflowEditorComponent im private currentOpenedOperatorID: string | null = null; private removeButton!: new () => joint.linkTools.Button; private breakpointButton!: new () => joint.linkTools.Button; + private cachedEntries: ReadonlyArray<WorkflowCacheEntry> = []; + // Chat popover state (operator chat button) + public chatPopoverOperator: { + operatorId: string; + displayName: string; + position: { x: number; y: number }; + } | null = null; + + // Cached agent result summaries for port label display + constructor( private workflowActionService: WorkflowActionService, private dynamicSchemaService: DynamicSchemaService, @@@ -192,10 -188,9 +217,11 @@@ this.handlePortHighlightEvent(); this.registerPortDisplayNameChangeHandler(); this.handleOperatorStatisticsUpdate(); + this.handleCacheUsageUpdate(); + this.handleCacheEntriesUpdate(); this.handleRegionEvents(); this.handleOperatorSuggestionHighlightEvent(); + this.handleAgentHoverHighlight(); this.handleElementDelete(); this.handleElementSelectAll(); this.handleElementCopy(); @@@ -480,14 -396,18 +509,22 @@@ const ops = region.map(id => this.paper.getModelById(id)); this.paper.model.addCell(element); this.updateRegionElement(element, ops); + // Apply visibility state + if (regionsVisible) { + element.attr("body/visibility", "visible"); + } return { regionElement: element, operators: ops }; }); + // regions are recreated on every update, so reapply the current toggle state to the new elements + this.setRegionsVisibility(this.wrapper.getRegionsDisplayed()); }); + // apply the View > Regions toggle to all existing region elements (canvas and mini-map share the model) + this.wrapper + .getRegionsDisplayedStream() + .pipe(untilDestroyed(this)) + .subscribe(displayed => this.setRegionsVisibility(displayed)); + this.paper.model.on("change:position", operator => { regionMap .filter(region => region.operators.includes(operator)) diff --cc frontend/src/app/workspace/service/joint-ui/joint-ui.service.ts index 0d65759333,d069a270eb..0320e971fa --- a/frontend/src/app/workspace/service/joint-ui/joint-ui.service.ts +++ b/frontend/src/app/workspace/service/joint-ui/joint-ui.service.ts @@@ -238,10 -301,11 +301,12 @@@ export class JointUIService width: JointUIService.DEFAULT_OPERATOR_WIDTH, height: JointUIService.DEFAULT_OPERATOR_HEIGHT, }, + portLabelMarkup: JointUIService.getCustomPortLabelMarkup(), attrs: JointUIService.getCustomOperatorStyleAttrs( operator, - operator.customDisplayName ?? operatorSchema.additionalMetadata.userFriendlyName, + JointUIService.truncateOperatorDisplayName( + operator.customDisplayName ?? operatorSchema.additionalMetadata.userFriendlyName + ), operatorSchema.operatorType, operatorSchema.additionalMetadata.userFriendlyName ), @@@ -346,37 -387,17 +411,34 @@@ const inputMetrics = statistics.inputPortMetrics; const outputMetrics = statistics.outputPortMetrics; + // Cached operators show "-" for inputs and non-materialized outputs, and label workers as "from cache". + const isSkippedFromCache = statistics.operatorState === OperatorState.CompletedFromCache; const workerCount = statistics.numWorkers ?? 1; - element.attr(`.${operatorWorkerCountClass}/text`, "#workers: " + String(workerCount)); + const workerCountLabel = isSkippedFromCache ? "from cache" : "#workers: " + String(workerCount); + element.attr(`.${operatorWorkerCountClass}/text`, workerCountLabel); - element.attr( - `.${operatorStatusTextClass}/text`, - "status: " + JointUIService.getStatusDisplayText(statistics.operatorState) - ); - inPorts.forEach(portDef => { const portId = portDef.id; if (portId != null) { const parts = portId.split("-"); const numericSuffix = parts.length > 1 ? parts[1] : portId; + const count: number = inputMetrics[numericSuffix] ?? 0; + element.portProp(portId, "attrs/.port-label/text", count.toLocaleString()); + + const count = inputMetrics[numericSuffix]; + const rawAttrs = (portDef.attrs as any) || {}; + const oldText: string = (rawAttrs[".port-label"] && rawAttrs[".port-label"].text) || ""; + let originalName = oldText.includes(":") ? oldText.split(":", 1)[0].trim() : oldText; + + if (!originalName) { + originalName = portId; + } + + // Negative counts mark skipped/unknown inputs from cached sub-operators. + const isUnknownCount = count !== undefined && count < 0; + const labelText = isSkippedFromCache || isUnknownCount ? "-" : (count ?? 0).toLocaleString(); + element.portProp(portId, "attrs/.port-label/text", labelText); } }); @@@ -385,77 -406,12 +447,77 @@@ if (portId != null) { const parts = portId.split("-"); const numericSuffix = parts.length > 1 ? parts[1] : portId; - const count: number = outputMetrics[numericSuffix] ?? 0; + element.portProp(portId, "attrs/.port-label/text", count.toLocaleString()); + const rawAttrs = (portDef.attrs as any) || {}; + const oldText: string = (rawAttrs[".port-label"] && rawAttrs[".port-label"].text) || ""; + let originalName = oldText.includes(":") ? oldText.split(":", 1)[0].trim() : oldText; + + if (!originalName) { + originalName = portId; + } + + const baseLabel = isSkippedFromCache && count === undefined ? "-" : (count ?? 0).toLocaleString(); + element.portProp(portId, "attrs/.port-label/text", baseLabel); } }); + const effectiveCacheLabels = isSkippedFromCache ? cachePortLabels : undefined; + this.changeOperatorCacheLabels(jointPaper, operatorID, effectiveCacheLabels); this.changeOperatorState(jointPaper, operatorID, statistics.operatorState); } + + /** + * Updates cache usage labels for output ports without changing counts or operator state. + */ + public changeOperatorCacheLabels( + jointPaper: joint.dia.Paper, + operatorID: string, + cachePortLabels?: Record<string, string> + ): void { + const element = jointPaper.getModelById(operatorID) as joint.shapes.devs.Model; + if (!element) { + return; + } + const outPorts = element.getPorts().filter(p => p.group === "out"); + outPorts.forEach(portDef => { + const portId = portDef.id; + if (portId != null) { + const parts = portId.split("-"); + const numericSuffix = parts.length > 1 ? parts[1] : portId; + const cacheLabel = cachePortLabels?.[numericSuffix] ?? ""; + element.portProp(portId, "attrs/.port-cache-label/text", cacheLabel); + element.portProp( + portId, + "attrs/.port-cache-label/transform", + cacheLabel ? "translate(0, 12)" : "" + ); + } + }); + } + + /** + * Updates cached output port indicator badges without changing counts or labels. + */ + public changeOperatorCachedPorts( + jointPaper: joint.dia.Paper, + operatorID: string, + cachedPortIds?: Set<string> + ): void { + const element = jointPaper.getModelById(operatorID) as joint.shapes.devs.Model; + if (!element) { + return; + } + const outPorts = element.getPorts().filter(p => p.group === "out"); + outPorts.forEach(portDef => { + const portId = portDef.id; + if (portId != null) { + const parts = portId.split("-"); + const numericSuffix = parts.length > 1 ? parts[1] : portId; + const isCached = cachedPortIds?.has(numericSuffix) ?? false; + element.portProp(portId, "attrs/.port-cache-indicator/display", isCached ? "block" : "none"); + } + }); + } public foldOperatorDetails(jointPaper: joint.dia.Paper, operatorID: string): void { jointPaper.getModelById(operatorID).attr({ [`.${operatorStateClass}`]: { visibility: "hidden" },
