This is an automated email from the ASF dual-hosted git repository. Xiao-zhen-Liu pushed a commit to branch xiaozhen-caching-prototype in repository https://gitbox.apache.org/repos/asf/texera.git
commit d03494cd505f46e2de7861c9878fd6e0cfb2ccf1 Merge: bdaa01f8e2 51fb0b8acb Author: Xiaozhen Liu <[email protected]> AuthorDate: Mon Dec 8 18:48:08 2025 -0800 Merge branch 'refs/heads/main' into xiaozhen-caching-prototype # Conflicts: # amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/promisehandlers/PortCompletedHandler.scala # amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala # amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostEstimator.scala # amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/RegionExecutionCoordinator.scala # amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/WorkflowExecutionCoordinator.scala # amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala .github/release/vote-email-template.md | 85 ++++ .github/workflows/build-and-push-images.yml | 41 +- .github/workflows/check-header.yml | 2 +- .github/workflows/create-release-candidate.yml | 340 +++++++++++++ .github/workflows/github-action-build.yml | 10 +- .github/workflows/lint-pr.yml | 2 +- .licenserc.yaml | 1 + .run/AccessControlService.run.xml | 34 ++ .run/ComputingUnitManagingService.run.xml | 34 ++ .run/ComputingUnitMaster.run.xml | 34 ++ .run/ComputingUnitWorker.run.xml | 34 ++ .run/ConfigService.run.xml | 34 ++ .run/FileService.run.xml | 34 ++ .run/TexeraWebApplication.run.xml | 34 ++ .run/WorkflowCompilingService.run.xml | 34 ++ .run/frontend.run.xml | 30 ++ .run/texera micro services.run.xml | 31 ++ .run/texera-lakefs.run.xml | 28 ++ .../texera/service/AccessControlService.scala | 11 +- .../service/resource/AccessControlResource.scala | 128 ++++- amber/operator-requirements.txt | 1 + amber/requirements.txt | 13 +- .../engine/architecture/rpc/controlcommands.proto | 18 +- .../architecture/rpc/controllerservice.proto | 6 +- .../engine/architecture/rpc/controlreturns.proto | 6 +- .../engine/architecture/rpc/testerservice.proto | 6 +- .../engine/architecture/rpc/workerservice.proto | 6 +- .../architecture/sendsemantics/partitionings.proto | 4 +- .../engine/architecture/worker/statistics.proto | 4 +- .../amber/engine/common/actormessage.proto | 2 +- .../amber/engine/common/ambermessage.proto | 8 +- .../engine/common/executionruntimestate.proto | 12 +- .../handlers/actorcommand/actor_handler_base.py | 2 +- .../handlers/actorcommand/backpressure_handler.py | 6 +- .../handlers/actorcommand/credit_update_handler.py | 2 +- .../handlers/control/add_input_channel_handler.py | 2 +- .../handlers/control/add_partitioning_handler.py | 3 +- .../handlers/control/assign_port_handler.py | 5 +- .../handlers/control/control_handler_base.py | 2 +- .../handlers/control/debug_command_handler.py | 2 +- .../handlers/control/end_channel_handler.py | 5 +- .../handlers/control/end_worker_handler.py | 2 +- .../control/evaluate_expression_handler.py | 3 +- .../control/initialize_executor_handler.py | 5 +- .../handlers/control/no_operation_handler.py | 3 +- .../handlers/control/open_executor_handler.py | 6 +- .../handlers/control/pause_worker_handler.py | 3 +- .../handlers/control/query_statistics_handler.py | 5 +- .../control/replay_current_tuple_handler.py | 8 +- .../handlers/control/resume_worker_handler.py | 3 +- .../handlers/control/start_channel_handler.py | 5 +- .../handlers/control/start_worker_handler.py | 6 +- .../handlers/control/update_executor_handler.py | 2 +- .../managers/console_message_manager.py | 2 +- .../python/core/architecture/managers/context.py | 4 +- .../managers/embedded_control_message_manager.py | 10 +- .../core/architecture/managers/executor_manager.py | 43 +- .../core/architecture/managers/pause_manager.py | 4 +- .../architecture/managers/statistics_manager.py | 4 +- .../architecture/managers/test_executor_manager.py | 174 +++++++ .../architecture/managers/test_pause_manager.py | 2 +- .../architecture/managers/test_state_manager.py | 2 +- .../managers/tuple_processing_manager.py | 2 +- .../core/architecture/packaging/input_manager.py | 5 +- .../core/architecture/packaging/output_manager.py | 6 +- .../core/architecture/rpc/async_rpc_client.py | 7 +- .../core/architecture/rpc/async_rpc_server.py | 6 +- .../sendsemantics/broad_cast_partitioner.py | 6 +- .../hash_based_shuffle_partitioner.py | 6 +- .../sendsemantics/one_to_one_partitioner.py | 6 +- .../core/architecture/sendsemantics/partitioner.py | 6 +- .../range_based_shuffle_partitioner.py | 6 +- .../sendsemantics/round_robin_partitioner.py | 6 +- .../src/main/python/core/models/RTableExecutor.py | 132 ----- .../src/main/python/core/models/RTupleExecutor.py | 167 ------- .../src/main/python/core/models/internal_queue.py | 7 +- amber/src/main/python/core/models/r_utils.py | 79 --- .../main/python/core/models/test_RTableExecutor.py | 539 --------------------- amber/src/main/python/core/models/test_tuple.py | 4 +- amber/src/main/python/core/models/tuple.py | 6 +- .../main/python/core/runnables/data_processor.py | 2 +- amber/src/main/python/core/runnables/main_loop.py | 12 +- .../main/python/core/runnables/network_receiver.py | 4 +- .../main/python/core/runnables/network_sender.py | 6 +- .../python/core/runnables/test_console_message.py | 6 +- .../main/python/core/runnables/test_main_loop.py | 12 +- .../python/core/runnables/test_network_receiver.py | 6 +- .../core/storage/iceberg/test_iceberg_document.py | 36 +- .../input_port_materialization_reader_runnable.py | 6 +- .../src/main/python/core/storage/storage_config.py | 2 +- .../main/python/core/storage/vfs_uri_factory.py | 2 +- .../main/python/core/util/buffer/timed_buffer.py | 2 +- .../core/util/console_message/replace_print.py | 2 +- .../core/util/console_message/timed_buffer.py | 4 +- .../core/util/expression_evaluator/__init__.py | 5 +- .../test_expression_evaluator.py | 5 +- .../python/core/util/virtual_identity/__init__.py | 2 +- amber/src/main/python/proto/__init__.py | 16 - amber/src/main/python/proto/org/__init__.py | 16 - amber/src/main/python/proto/org/apache/__init__.py | 16 - .../main/python/proto/org/apache/amber/__init__.py | 16 - .../proto/org/apache/amber/engine/__init__.py | 16 - .../apache/amber/engine/architecture/__init__.py | 16 - .../python/proto/org/apache/texera/__init__.py | 16 - .../proto/org/apache/texera/amber/__init__.py | 0 .../org/apache/{ => texera}/amber/core/__init__.py | 19 +- .../org/apache/texera/amber/engine/__init__.py | 0 .../texera/amber/engine/architecture/__init__.py | 0 .../amber/engine/architecture/rpc/__init__.py | 199 ++++---- .../engine/architecture/sendsemantics/__init__.py | 19 +- .../amber/engine/architecture/worker/__init__.py | 19 +- .../{ => texera}/amber/engine/common/__init__.py | 19 +- amber/src/main/python/proto/scalapb/__init__.py | 17 - amber/src/main/python/pyproject.toml | 20 +- .../pytexera/storage/dataset_file_document.py | 3 +- .../amber/clustering/ClusterListener.scala | 18 +- .../amber/clustering/SingleNodeListener.scala | 4 +- .../common/AkkaActorRefMappingService.scala | 12 +- .../architecture/common/AkkaActorService.scala | 6 +- .../common/AkkaMessageTransferService.scala | 14 +- .../architecture/common/AmberProcessor.scala | 20 +- .../architecture/common/ExecutorDeployment.scala | 18 +- .../architecture/common/ProcessingStepCursor.scala | 6 +- .../engine/architecture/common/WorkflowActor.scala | 18 +- .../architecture/controller/ClientEvent.scala | 12 +- .../architecture/controller/Controller.scala | 26 +- .../ControllerAsyncRPCHandlerInitializer.scala | 14 +- .../controller/ControllerProcessor.scala | 18 +- .../controller/ControllerTimerService.scala | 12 +- .../controller/GlobalReplayManager.scala | 4 +- .../engine/architecture/controller/Workflow.scala | 4 +- .../controller/WorkflowScheduler.scala | 8 +- .../controller/execution/ChannelExecution.scala | 2 +- .../controller/execution/ExecutionUtils.scala | 11 +- .../controller/execution/LinkExecution.scala | 4 +- .../controller/execution/OperatorExecution.scala | 22 +- .../controller/execution/RegionExecution.scala | 14 +- .../controller/execution/WorkerPortExecution.scala | 2 +- .../controller/execution/WorkflowExecution.scala | 16 +- .../promisehandlers/ConsoleMessageHandler.scala | 8 +- .../promisehandlers/DebugCommandHandler.scala | 10 +- .../EmbeddedControlMessageHandler.scala | 14 +- .../EvaluatePythonExpressionHandler.scala | 10 +- .../promisehandlers/LinkWorkersHandler.scala | 8 +- .../controller/promisehandlers/PauseHandler.scala | 18 +- .../promisehandlers/PortCompletedHandler.scala | 20 +- .../QueryWorkerStatisticsHandler.scala | 19 +- .../controller/promisehandlers/ResumeHandler.scala | 13 +- .../RetrieveWorkflowStateHandler.scala | 16 +- .../promisehandlers/RetryWorkflowHandler.scala | 10 +- .../promisehandlers/StartWorkflowHandler.scala | 13 +- .../TakeGlobalCheckpointHandler.scala | 20 +- .../WorkerExecutionCompletedHandler.scala | 10 +- .../WorkerStateUpdatedHandler.scala | 10 +- .../architecture/deploysemantics/AddressInfo.scala | 2 +- .../deploystrategy/DeployStrategy.scala | 2 +- .../deploysemantics/deploystrategy/OneOnEach.scala | 2 +- .../deploystrategy/RandomDeployment.scala | 2 +- .../deploystrategy/RoundRobinDeployment.scala | 2 +- .../deploysemantics/layer/WorkerExecution.scala | 10 +- .../logreplay/AsyncReplayLogWriter.scala | 10 +- .../architecture/logreplay/EmptyReplayLogger.scala | 9 +- .../architecture/logreplay/OrderEnforcer.scala | 4 +- .../logreplay/ReplayLogGenerator.scala | 8 +- .../architecture/logreplay/ReplayLogManager.scala | 19 +- .../architecture/logreplay/ReplayLogger.scala | 9 +- .../architecture/logreplay/ReplayLoggerImpl.scala | 11 +- .../logreplay/ReplayOrderEnforcer.scala | 4 +- .../messaginglayer/AmberFIFOChannel.scala | 12 +- .../messaginglayer/CongestionControl.scala | 4 +- .../messaginglayer/DeadLetterMonitorActor.scala | 4 +- .../architecture/messaginglayer/FlowControl.scala | 8 +- .../architecture/messaginglayer/InputGateway.scala | 6 +- .../architecture/messaginglayer/InputManager.scala | 18 +- .../messaginglayer/NetworkInputGateway.scala | 8 +- .../messaginglayer/NetworkOutputGateway.scala | 10 +- .../messaginglayer/OrderingEnforcer.scala | 2 +- .../messaginglayer/OutputManager.scala | 28 +- .../architecture/messaginglayer/WorkerPort.scala | 6 +- .../messaginglayer/WorkerTimerService.scala | 19 +- .../pythonworker/PythonProxyClient.scala | 22 +- .../pythonworker/PythonProxyServer.scala | 20 +- .../pythonworker/PythonWorkflowWorker.scala | 26 +- .../pythonworker/WorkerBatchInternalQueue.scala | 12 +- .../scheduling/CostBasedScheduleGenerator.scala | 21 +- .../architecture/scheduling/CostEstimator.scala | 14 +- .../ExpansionGreedyScheduleGenerator.scala | 14 +- .../engine/architecture/scheduling/Region.scala | 8 +- .../scheduling/RegionExecutionCoordinator.scala | 34 +- .../architecture/scheduling/RegionPlan.scala | 4 +- .../engine/architecture/scheduling/Schedule.scala | 2 +- .../scheduling/ScheduleGenerator.scala | 8 +- .../architecture/scheduling/SchedulingUtils.scala | 2 +- .../scheduling/WorkflowExecutionCoordinator.scala | 15 +- .../scheduling/config/ChannelConfig.scala | 6 +- .../scheduling/config/LinkConfig.scala | 8 +- .../scheduling/config/OperatorConfig.scala | 2 +- .../scheduling/config/PortConfig.scala | 4 +- .../scheduling/config/ResourceConfig.scala | 6 +- .../scheduling/config/WorkerConfig.scala | 10 +- .../resourcePolicies/ExecutionClusterInfo.scala | 2 +- .../resourcePolicies/ResourceAllocator.scala | 22 +- .../partitioners/BroadcastPartitioner.scala | 8 +- .../partitioners/HashBasedShufflePartitioner.scala | 8 +- .../partitioners/OneToOnePartitioner.scala | 8 +- .../sendsemantics/partitioners/Partitioner.scala | 14 +- .../RangeBasedShufflePartitioner.scala | 8 +- .../partitioners/RoundRobinPartitioner.scala | 8 +- .../engine/architecture/worker/DPThread.scala | 25 +- .../engine/architecture/worker/DataProcessor.scala | 42 +- .../DataProcessorRPCHandlerInitializer.scala | 16 +- .../worker/EmbeddedControlMessageManager.scala | 12 +- .../engine/architecture/worker/PauseManager.scala | 8 +- .../engine/architecture/worker/PauseType.scala | 4 +- .../architecture/worker/WorkflowWorker.scala | 29 +- .../InputPortMaterializationReaderThread.scala | 30 +- .../managers/OutputPortResultWriterThread.scala | 6 +- .../worker/managers/SerializationManager.scala | 16 +- .../worker/managers/StatisticsManager.scala | 8 +- .../promisehandlers/AddInputChannelHandler.scala | 14 +- .../promisehandlers/AddPartitioningHandler.scala | 14 +- .../worker/promisehandlers/AssignPortHandler.scala | 23 +- .../worker/promisehandlers/EndChannelHandler.scala | 15 +- .../worker/promisehandlers/EndHandler.scala | 11 +- .../FinalizeCheckpointHandler.scala | 14 +- .../FlushNetworkBufferHandler.scala | 11 +- .../InitializeExecutorHandler.scala | 14 +- .../promisehandlers/OpenExecutorHandler.scala | 11 +- .../worker/promisehandlers/PauseHandler.scala | 20 +- .../promisehandlers/PrepareCheckpointHandler.scala | 16 +- .../promisehandlers/QueryStatisticsHandler.scala | 13 +- .../worker/promisehandlers/ResumeHandler.scala | 16 +- .../promisehandlers/RetrieveStateHandler.scala | 11 +- .../promisehandlers/StartChannelHandler.scala | 17 +- .../worker/promisehandlers/StartHandler.scala | 21 +- .../amber/engine/common/AmberConfig.scala | 2 +- .../amber/engine/common/AmberKryoInitializer.scala | 2 +- .../amber/engine/common/AmberLogging.scala | 6 +- .../amber/engine/common/AmberRuntime.scala | 8 +- .../amber/engine/common/CheckpointState.scala | 2 +- .../amber/engine/common/CheckpointSupport.scala | 6 +- .../amber/engine/common/ElidableStatement.scala | 2 +- .../amber/engine/common/FutureBijection.scala | 2 +- .../amber/engine/common/SerializedState.scala | 2 +- .../{ => texera}/amber/engine/common/Utils.scala | 4 +- .../amberexception/BreakpointException.scala | 4 +- .../engine/common/ambermessage/DataPayload.scala | 6 +- .../ambermessage/DirectControlMessagePayload.scala | 2 +- .../common/ambermessage/RecoveryPayload.scala | 4 +- .../ambermessage/WorkflowFIFOMessagePayload.scala | 2 +- .../common/ambermessage/WorkflowMessage.scala | 4 +- .../amber/engine/common/client/AmberClient.scala | 23 +- .../amber/engine/common/client/ClientActor.scala | 35 +- .../amber/engine/common/rpc/AsyncRPCClient.scala | 24 +- .../common/rpc/AsyncRPCHandlerInitializer.scala | 14 +- .../amber/engine/common/rpc/AsyncRPCServer.scala | 17 +- .../common/statetransition/StateManager.scala | 8 +- .../statetransition/WorkerStateManager.scala | 8 +- .../engine/common/storage/EmptyRecordStorage.scala | 4 +- .../engine/common/storage/HDFSRecordStorage.scala | 4 +- .../common/storage/SequentialRecordStorage.scala | 6 +- .../engine/common/storage/VFSRecordStorage.scala | 4 +- .../amber/engine/common/virtualidentity/util.scala | 4 +- .../{ => texera}/amber/error/ErrorUtils.scala | 12 +- .../apache/texera/web/ComputingUnitMaster.scala | 26 +- .../apache/texera/web/ComputingUnitWorker.scala | 2 +- .../scala/org/apache/texera/web/SessionState.scala | 2 +- .../apache/texera/web/TexeraWebApplication.scala | 6 +- .../texera/web/WorkflowLifecycleManager.scala | 8 +- .../http/response/SchemaPropagationResponse.scala | 2 +- .../websocket/event/PaginatedResultEvent.scala | 2 +- .../model/websocket/event/WorkflowErrorEvent.scala | 2 +- .../event/python/ConsoleUpdateEvent.scala | 2 +- .../request/EditingTimeCompilationRequest.scala | 2 +- .../websocket/request/ModifyLogicRequest.scala | 2 +- .../websocket/request/WorkflowExecuteRequest.scala | 4 +- .../python/PythonExpressionEvaluateResponse.scala | 2 +- .../web/resource/CollaborationResource.scala | 2 +- .../web/resource/SystemMetadataResource.scala | 2 +- .../web/resource/WorkflowWebsocketResource.scala | 12 +- .../resource/aiassistant/AiAssistantManager.scala | 2 +- .../dashboard/DatasetSearchQueryBuilder.scala | 2 +- .../web/resource/dashboard/hub/HubResource.scala | 2 +- .../user/workflow/WorkflowExecutionsResource.scala | 23 +- .../dashboard/user/workflow/WorkflowResource.scala | 7 +- .../user/workflow/WorkflowVersionResource.scala | 2 +- .../web/service/EmailNotificationService.scala | 2 +- .../web/service/ExecutionConsoleService.scala | 30 +- .../service/ExecutionReconfigurationService.scala | 6 +- .../web/service/ExecutionResultService.scala | 32 +- .../web/service/ExecutionRuntimeService.scala | 12 +- .../texera/web/service/ExecutionStatsService.scala | 33 +- .../service/ExecutionsMetadataPersistService.scala | 2 +- .../service/FriesReconfigurationAlgorithm.scala | 8 +- .../texera/web/service/ResultExportService.scala | 14 +- .../texera/web/service/WorkflowEmailNotifier.scala | 4 +- .../web/service/WorkflowExecutionService.scala | 16 +- .../texera/web/service/WorkflowService.scala | 32 +- .../storage/ExecutionReconfigurationStore.scala | 6 +- .../texera/web/storage/ExecutionStateStore.scala | 6 +- .../org/apache/texera/web/storage/StateStore.scala | 2 +- .../texera/web/storage/WorkflowStateStore.scala | 2 +- .../org/apache/texera/workflow/LogicalLink.scala | 4 +- .../org/apache/texera/workflow/LogicalPlan.scala | 8 +- .../apache/texera/workflow/WorkflowCompiler.scala | 6 +- .../GitVersionControlLocalFileStorageSpec.java | 4 +- .../breakpoint/ExceptionBreakpointSpec.scala | 2 +- .../architecture/control/TrivialControlSpec.scala | 22 +- .../architecture/control/utils/ChainHandler.scala | 6 +- .../control/utils/CollectHandler.scala | 6 +- .../architecture/control/utils/ErrorHandler.scala | 6 +- .../control/utils/MultiCallHandler.scala | 8 +- .../architecture/control/utils/NestedHandler.scala | 6 +- .../control/utils/PingPongHandler.scala | 6 +- .../control/utils/RecursionHandler.scala | 6 +- .../utils/TesterAsyncRPCHandlerInitializer.scala | 12 +- .../control/utils/TrivialControlTester.scala | 24 +- .../architecture/controller/ControllerSpec.scala | 4 +- .../messaginglayer/NetworkInputGatewaySpec.scala | 8 +- .../messaginglayer/OutputManagerSpec.scala | 12 +- .../messaginglayer/RangeBasedShuffleSpec.scala | 10 +- .../pythonworker/PythonWorkflowWorkerSpec.scala | 30 +- .../CostBasedScheduleGeneratorSpec.scala | 10 +- .../scheduling/DefaultCostEstimatorSpec.scala | 30 +- .../ExpansionGreedyScheduleGeneratorSpec.scala | 19 +- .../engine/architecture/worker/DPThreadSpec.scala | 31 +- .../architecture/worker/DataProcessorSpec.scala | 34 +- .../engine/architecture/worker/WorkerSpec.scala | 40 +- .../engine/e2e/BatchSizePropagationSpec.scala | 18 +- .../amber/engine/e2e/DataProcessingSpec.scala | 30 +- .../{ => texera}/amber/engine/e2e/PauseSpec.scala | 23 +- .../{ => texera}/amber/engine/e2e/TestUtils.scala | 10 +- .../engine/faulttolerance/CheckpointSpec.scala | 25 +- .../amber/engine/faulttolerance/LoggingSpec.scala | 28 +- .../amber/engine/faulttolerance/ReplaySpec.scala | 25 +- .../workflow/WorkflowVersionResourceSpec.scala | 2 +- .../web/service/ExecutionConsoleServiceSpec.scala | 7 +- .../web/service/ExecutionResultServiceSpec.scala | 2 +- .../storage/ReadonlyLocalFileDocumentSpec.scala | 6 +- bin/computing-unit-master.dockerfile | 58 +-- bin/computing-unit-worker.dockerfile | 66 +-- bin/fix-format.sh | 8 +- bin/k8s/values.yaml | 16 +- .../litellm-config.yaml | 25 +- bin/python-proto-gen.sh | 2 +- bin/single-node/.env | 11 +- bin/single-node/docker-compose.yml | 101 ++-- bin/single-node/nginx.conf | 12 + build.sbt | 2 +- common/config/src/main/resources/cluster.conf | 2 +- common/config/src/main/resources/gui.conf | 4 + .../config/src/main/resources/llm.conf | 12 +- .../{ => texera}/amber/config/AkkaConfig.scala | 2 +- .../amber/config/ApplicationConfig.scala | 2 +- .../amber/config/EnvironmentalVariable.scala | 2 +- .../{ => texera}/amber/config/StorageConfig.scala | 4 +- .../{ => texera}/amber/config/UdfConfig.scala | 2 +- .../{ => texera}/amber/util/ConfigParserUtil.scala | 2 +- .../scala/org/apache/texera/config/GuiConfig.scala | 2 + .../config/LLMConfig.scala} | 13 +- common/workflow-core/build.sbt | 13 +- .../apache/{ => texera}/amber/core/executor.proto | 4 +- .../{ => texera}/amber/core/virtualidentity.proto | 2 +- .../apache/{ => texera}/amber/core/workflow.proto | 4 +- .../amber/core/workflowruntimestate.proto | 2 +- .../amber/core/WorkflowRuntimeException.scala | 4 +- .../amber/core/executor/ExecFactory.scala | 2 +- .../core/executor/JavaRuntimeCompilation.scala | 4 +- .../amber/core/executor/OperatorExecutor.scala | 8 +- .../core/executor/SourceOperatorExecutor.scala | 6 +- .../{ => texera}/amber/core/state/State.scala | 4 +- .../amber/core/storage/DocumentFactory.scala | 20 +- .../amber/core/storage/FileResolver.scala | 2 +- .../core/storage/IcebergCatalogInstance.scala | 6 +- .../amber/core/storage/VFSURIFactory.scala | 14 +- .../core/storage/model/BufferedItemWriter.scala | 2 +- .../core/storage/model/DatasetFileDocument.scala | 10 +- .../amber/core/storage/model/OnDataset.scala | 2 +- .../storage/model/ReadonlyLocalFileDocument.scala | 2 +- .../storage/model/ReadonlyVirtualDocument.scala | 2 +- .../core/storage/model/VirtualCollection.scala | 2 +- .../amber/core/storage/model/VirtualDocument.scala | 2 +- .../amber/core/storage/result/ResultSchema.scala | 4 +- .../core/storage/result/WorkflowResultStore.scala | 4 +- .../storage/result/iceberg/IcebergDocument.scala | 10 +- .../result/iceberg/IcebergTableWriter.scala | 8 +- .../core/storage/result/iceberg/OnIceberg.scala | 4 +- .../core/storage/util/LakeFSStorageClient.scala | 4 +- .../amber/core/storage/util/StorageUtil.scala | 2 +- .../dataset/GitVersionControlLocalFileStorage.java | 2 +- .../storage/util/dataset/JGitVersionControl.java | 2 +- .../storage/util/dataset/PhysicalFileNode.java | 2 +- .../{ => texera}/amber/core/tuple/Attribute.java | 2 +- .../amber/core/tuple/AttributeType.java | 5 +- .../amber/core/tuple/AttributeTypeUtils.scala | 23 +- .../texera/amber/core/tuple/LargeBinary.java | 109 +++++ .../{ => texera}/amber/core/tuple/Schema.scala | 2 +- .../{ => texera}/amber/core/tuple/Tuple.scala | 4 +- .../{ => texera}/amber/core/tuple/TupleLike.scala | 4 +- .../{ => texera}/amber/core/tuple/TupleUtils.scala | 8 +- .../amber/core/workflow/LocationPreference.scala | 2 +- .../amber/core/workflow/PartitionInfo.scala | 2 +- .../amber/core/workflow/PhysicalOp.scala | 8 +- .../amber/core/workflow/PhysicalPlan.scala | 8 +- .../amber/core/workflow/WorkflowContext.scala | 6 +- .../amber/core/workflow/WorkflowSettings.scala | 2 +- .../{ => texera}/amber/util/ArrowUtils.scala | 6 +- .../{ => texera}/amber/util/IcebergUtil.scala | 96 +++- .../apache/{ => texera}/amber/util/JSONUtils.scala | 6 +- .../amber/util/VirtualIdentityUtils.scala | 4 +- .../amber/util/serde/GlobalPortIdentitySerde.scala | 6 +- .../util/serde/PortIdentityKeyDeserializer.scala | 4 +- .../util/serde/PortIdentityKeySerializer.scala | 6 +- .../service/util/LargeBinaryInputStream.scala | 82 ++++ .../texera/service/util/LargeBinaryManager.scala | 66 +++ .../service/util/LargeBinaryOutputStream.scala | 121 +++++ .../texera/service/util/S3StorageClient.scala | 122 ++++- .../core/storage/model/VirtualDocumentSpec.scala | 2 +- .../amber/core/tuple/AttributeTypeUtilsSpec.scala | 27 +- .../{ => texera}/amber/core/tuple/SchemaSpec.scala | 2 +- .../{ => texera}/amber/core/tuple/TupleSpec.scala | 4 +- .../amber/storage/FileResolverSpec.scala | 4 +- .../IcebergDocumentConsoleMessagesSpec.scala | 12 +- .../result/iceberg/IcebergDocumentSpec.scala | 14 +- .../result/iceberg/IcebergTableStatsSpec.scala | 14 +- .../{ => texera}/amber/util/IcebergUtilSpec.scala | 105 +++- .../service/util/LargeBinaryInputStreamSpec.scala | 352 ++++++++++++++ .../service/util/LargeBinaryManagerSpec.scala | 471 ++++++++++++++++++ .../service/util/LargeBinaryOutputStreamSpec.scala | 252 ++++++++++ .../texera/service/util/S3StorageClientSpec.scala | 337 +++++++++++++ .../texera/service/util/S3StorageTestBase.scala | 69 +++ .../amber/operator/DummyProperties.scala | 2 +- .../{ => texera}/amber/operator/LogicalOp.scala | 203 ++++---- .../amber/operator/PortDescriptor.scala | 4 +- .../amber/operator/PythonOperatorDescriptor.scala | 10 +- .../amber/operator/SpecialPhysicalOpFactory.scala | 12 +- .../amber/operator/TestOperators.scala | 20 +- .../amber/operator/aggregate/AggregateOpDesc.scala | 25 +- .../amber/operator/aggregate/AggregateOpExec.scala | 8 +- .../operator/aggregate/AggregationFunction.java | 2 +- .../operator/aggregate/AggregationOperation.scala | 6 +- .../aggregate/DistributedAggregation.scala | 4 +- .../cartesianProduct/CartesianProductOpDesc.scala | 18 +- .../cartesianProduct/CartesianProductOpExec.scala | 8 +- .../dictionary/DictionaryMatcherOpDesc.scala | 25 +- .../dictionary/DictionaryMatcherOpExec.scala | 8 +- .../amber/operator/dictionary/MatchingType.java | 2 +- .../operator/difference/DifferenceOpDesc.scala | 14 +- .../operator/difference/DifferenceOpExec.scala | 6 +- .../amber/operator/distinct/DistinctOpDesc.scala | 14 +- .../amber/operator/distinct/DistinctOpExec.scala | 6 +- .../amber/operator/dummy/DummyOpDesc.scala | 8 +- .../amber/operator/filter/ComparisonType.java | 2 +- .../amber/operator/filter/FilterOpDesc.scala | 8 +- .../amber/operator/filter/FilterOpExec.scala | 6 +- .../amber/operator/filter/FilterPredicate.java | 16 +- .../operator/filter/SpecializedFilterOpDesc.scala | 14 +- .../operator/filter/SpecializedFilterOpExec.scala | 6 +- .../amber/operator/flatmap/FlatMapOpDesc.scala | 4 +- .../amber/operator/flatmap/FlatMapOpExec.scala | 6 +- .../operator/hashJoin/HashJoinBuildOpExec.scala | 8 +- .../amber/operator/hashJoin/HashJoinOpDesc.scala | 24 +- .../operator/hashJoin/HashJoinProbeOpExec.scala | 10 +- .../amber/operator/hashJoin/JoinType.java | 2 +- .../HuggingFaceIrisLogisticRegressionOpDesc.scala | 12 +- .../HuggingFaceSentimentAnalysisOpDesc.scala | 12 +- .../HuggingFaceSpamSMSDetectionOpDesc.scala | 12 +- .../HuggingFaceTextSummarizationOpDesc.scala | 12 +- .../amber/operator/ifStatement/IfOpDesc.scala | 16 +- .../amber/operator/ifStatement/IfOpExec.scala | 12 +- .../amber/operator/intersect/IntersectOpDesc.scala | 14 +- .../amber/operator/intersect/IntersectOpExec.scala | 6 +- .../operator/intervalJoin/IntervalJoinOpDesc.scala | 20 +- .../operator/intervalJoin/IntervalJoinOpExec.scala | 12 +- .../operator/intervalJoin/TimeIntervalType.java | 2 +- .../keywordSearch/KeywordSearchOpDesc.scala | 18 +- .../keywordSearch/KeywordSearchOpExec.scala | 8 +- .../amber/operator/limit/LimitOpDesc.scala | 16 +- .../amber/operator/limit/LimitOpExec.scala | 8 +- .../Scorer/MachineLearningScorerOpDesc.scala | 12 +- .../Scorer/classificationMetricsFnc.java | 2 +- .../Scorer/regressionMetricsFnc.java | 2 +- ...SklearnAdvancedKNNClassifierTrainerOpDesc.scala | 4 +- .../KNNTrainer/SklearnAdvancedKNNParameters.java | 4 +- .../SklearnAdvancedKNNRegressorTrainerOpDesc.scala | 4 +- .../SVCTrainer/SklearnAdvancedSVCParameters.java | 4 +- .../SklearnAdvancedSVCTrainerOpDesc.scala | 4 +- .../SVRTrainer/SklearnAdvancedSVRParameters.java | 4 +- .../SklearnAdvancedSVRTrainerOpDesc.scala | 4 +- .../sklearnAdvanced/base/HyperParameters.java | 6 +- .../base/SklearnAdvancedBaseDesc.scala | 12 +- .../amber/operator/map/MapOpDesc.scala | 8 +- .../amber/operator/map/MapOpExec.scala | 6 +- .../amber/operator/metadata/OPVersion.java | 2 +- .../operator/metadata/OperatorGroupConstants.scala | 2 +- .../metadata/OperatorMetadataGenerator.scala | 10 +- .../operator/metadata/PropertyNameConstants.scala | 2 +- .../annotations/AutofillAttributeName.java | 2 +- .../annotations/AutofillAttributeNameLambda.java | 2 +- .../annotations/AutofillAttributeNameList.java | 2 +- .../annotations/AutofillAttributeNameOnPort1.java | 2 +- .../metadata/annotations/BatchByColumn.java | 2 +- .../annotations/CommonOpDescAnnotation.java | 2 +- .../metadata/annotations/EnablePresets.java | 2 +- .../metadata/annotations/HideAnnotation.java | 2 +- .../operator/metadata/annotations/UIWidget.java | 2 +- .../amber/operator/projection/AttributeUnit.java | 4 +- .../operator/projection/ProjectionOpDesc.scala | 20 +- .../operator/projection/ProjectionOpExec.scala | 8 +- .../randomksampling/RandomKSamplingOpDesc.scala | 16 +- .../randomksampling/RandomKSamplingOpExec.scala | 6 +- .../amber/operator/regex/RegexOpDesc.scala | 18 +- .../amber/operator/regex/RegexOpExec.scala | 8 +- .../ReservoirSamplingOpDesc.scala | 16 +- .../ReservoirSamplingOpExec.scala | 10 +- .../amber/operator/sink/ProgressiveUtils.scala | 4 +- .../sklearn/SklearnAdaptiveBoostingOpDesc.scala | 2 +- .../operator/sklearn/SklearnBaggingOpDesc.scala | 2 +- .../sklearn/SklearnBernoulliNaiveBayesOpDesc.scala | 2 +- .../operator/sklearn/SklearnClassifierOpDesc.scala | 12 +- .../SklearnComplementNaiveBayesOpDesc.scala | 2 +- .../sklearn/SklearnDecisionTreeOpDesc.scala | 2 +- .../sklearn/SklearnDummyClassifierOpDesc.scala | 2 +- .../operator/sklearn/SklearnExtraTreeOpDesc.scala | 2 +- .../operator/sklearn/SklearnExtraTreesOpDesc.scala | 2 +- .../sklearn/SklearnGaussianNaiveBayesOpDesc.scala | 2 +- .../sklearn/SklearnGradientBoostingOpDesc.scala | 2 +- .../amber/operator/sklearn/SklearnKNNOpDesc.scala | 2 +- .../sklearn/SklearnLinearRegressionOpDesc.scala | 12 +- .../operator/sklearn/SklearnLinearSVMOpDesc.scala | 2 +- .../SklearnLogisticRegressionCVOpDesc.scala | 2 +- .../sklearn/SklearnLogisticRegressionOpDesc.scala | 2 +- .../SklearnMultiLayerPerceptronOpDesc.scala | 2 +- .../SklearnMultinomialNaiveBayesOpDesc.scala | 2 +- .../sklearn/SklearnNearestCentroidOpDesc.scala | 2 +- .../sklearn/SklearnPassiveAggressiveOpDesc.scala | 2 +- .../operator/sklearn/SklearnPerceptronOpDesc.scala | 2 +- .../operator/sklearn/SklearnPredictionOpDesc.scala | 12 +- .../SklearnProbabilityCalibrationOpDesc.scala | 2 +- .../sklearn/SklearnRandomForestOpDesc.scala | 2 +- .../operator/sklearn/SklearnRidgeCVOpDesc.scala | 2 +- .../operator/sklearn/SklearnRidgeOpDesc.scala | 2 +- .../amber/operator/sklearn/SklearnSDGOpDesc.scala | 2 +- .../amber/operator/sklearn/SklearnSVMOpDesc.scala | 2 +- .../SklearnTrainingAdaptiveBoostingOpDesc.scala | 2 +- .../training/SklearnTrainingBaggingOpDesc.scala | 2 +- .../SklearnTrainingBernoulliNaiveBayesOpDesc.scala | 2 +- ...SklearnTrainingComplementNaiveBayesOpDesc.scala | 2 +- .../SklearnTrainingDecisionTreeOpDesc.scala | 2 +- .../SklearnTrainingDummyClassifierOpDesc.scala | 2 +- .../training/SklearnTrainingExtraTreeOpDesc.scala | 2 +- .../training/SklearnTrainingExtraTreesOpDesc.scala | 2 +- .../SklearnTrainingGaussianNaiveBayesOpDesc.scala | 2 +- .../SklearnTrainingGradientBoostingOpDesc.scala | 2 +- .../training/SklearnTrainingKNNOpDesc.scala | 2 +- .../SklearnTrainingLinearRegressionOpDesc.scala | 2 +- .../training/SklearnTrainingLinearSVMOpDesc.scala | 2 +- ...SklearnTrainingLogisticRegressionCVOpDesc.scala | 2 +- .../SklearnTrainingLogisticRegressionOpDesc.scala | 2 +- ...SklearnTrainingMultiLayerPerceptronOpDesc.scala | 2 +- ...klearnTrainingMultinomialNaiveBayesOpDesc.scala | 2 +- .../SklearnTrainingNearestCentroidOpDesc.scala | 2 +- .../sklearn/training/SklearnTrainingOpDesc.scala | 12 +- .../SklearnTrainingPassiveAggressiveOpDesc.scala | 2 +- .../training/SklearnTrainingPerceptronOpDesc.scala | 2 +- ...learnTrainingProbabilityCalibrationOpDesc.scala | 2 +- .../SklearnTrainingRandomForestOpDesc.scala | 2 +- .../training/SklearnTrainingRidgeCVOpDesc.scala | 2 +- .../training/SklearnTrainingRidgeOpDesc.scala | 2 +- .../training/SklearnTrainingSDGOpDesc.scala | 2 +- .../training/SklearnTrainingSVMOpDesc.scala | 2 +- .../amber/operator/sleep/SleepOpDesc.scala | 16 +- .../amber/operator/sleep/SleepOpExec.scala | 8 +- .../amber/operator/sort/SortCriteriaUnit.java | 6 +- .../amber/operator/sort/SortOpDesc.scala | 10 +- .../amber/operator/sort/SortPreference.java | 2 +- .../operator/sort/StableMergeSortOpDesc.scala | 16 +- .../operator/sort/StableMergeSortOpExec.scala | 8 +- .../sortPartitions/SortPartitionsOpDesc.scala | 18 +- .../sortPartitions/SortPartitionsOpExec.scala | 8 +- .../amber/operator/source/BufferedBlockReader.java | 2 +- .../source/PythonSourceOperatorDescriptor.scala | 4 +- .../operator/source/SourceOperatorDescriptor.scala | 6 +- .../apis/reddit/RedditSearchSourceOpDesc.scala | 10 +- .../apis/reddit/RedditSourceOperatorFunction.java | 2 +- .../source/apis/twitter/TwitterSourceOpDesc.scala | 8 +- .../source/apis/twitter/TwitterSourceOpExec.scala | 6 +- .../v2/TwitterFullArchiveSearchSourceOpDesc.scala | 18 +- .../v2/TwitterFullArchiveSearchSourceOpExec.scala | 10 +- .../twitter/v2/TwitterSearchSourceOpDesc.scala | 18 +- .../twitter/v2/TwitterSearchSourceOpExec.scala | 10 +- .../source/apis/twitter/v2/TwitterUtils.scala | 4 +- .../operator/source/cache/CacheSourceOpExec.scala | 10 +- .../operator/source/fetcher/DecodingMethod.java | 2 +- .../operator/source/fetcher/RandomUserAgent.java | 2 +- .../operator/source/fetcher/URLFetchUtil.scala | 2 +- .../operator/source/fetcher/URLFetcherOpDesc.scala | 18 +- .../operator/source/fetcher/URLFetcherOpExec.scala | 10 +- .../operator/source/scan/AutoClosingIterator.scala | 2 +- .../operator/source/scan/FileAttributeType.java | 9 +- .../operator/source/scan/FileDecodingMethod.java | 2 +- .../source/scan/FileScanSourceOpDesc.scala | 18 +- .../source/scan/FileScanSourceOpExec.scala | 28 +- .../operator/source/scan/ScanSourceOpDesc.scala | 12 +- .../source/scan/arrow/ArrowSourceOpDesc.scala | 20 +- .../source/scan/arrow/ArrowSourceOpExec.scala | 12 +- .../source/scan/csv/CSVScanSourceOpDesc.scala | 20 +- .../source/scan/csv/CSVScanSourceOpExec.scala | 10 +- .../scan/csv/ParallelCSVScanSourceOpDesc.scala | 20 +- .../scan/csv/ParallelCSVScanSourceOpExec.scala | 12 +- .../scan/csvOld/CSVOldScanSourceOpDesc.scala | 20 +- .../scan/csvOld/CSVOldScanSourceOpExec.scala | 10 +- .../source/scan/json/JSONLScanSourceOpDesc.scala | 20 +- .../source/scan/json/JSONLScanSourceOpExec.scala | 14 +- .../amber/operator/source/scan/json/JSONUtil.scala | 2 +- .../source/scan/text/TextInputSourceOpDesc.scala | 20 +- .../source/scan/text/TextInputSourceOpExec.scala | 12 +- .../source/scan/text/TextSourceOpDesc.scala | 6 +- .../operator/source/sql/SQLSourceOpDesc.scala | 8 +- .../operator/source/sql/SQLSourceOpExec.scala | 10 +- .../source/sql/asterixdb/AsterixDBConnUtil.scala | 2 +- .../sql/asterixdb/AsterixDBSourceOpDesc.scala | 24 +- .../sql/asterixdb/AsterixDBSourceOpExec.scala | 12 +- .../operator/source/sql/mysql/MySQLConnUtil.scala | 2 +- .../source/sql/mysql/MySQLSourceOpDesc.scala | 18 +- .../source/sql/mysql/MySQLSourceOpExec.scala | 10 +- .../source/sql/postgresql/PostgreSQLConnUtil.scala | 2 +- .../sql/postgresql/PostgreSQLSourceOpDesc.scala | 20 +- .../sql/postgresql/PostgreSQLSourceOpExec.scala | 10 +- .../amber/operator/split/SplitOpDesc.scala | 18 +- .../amber/operator/split/SplitOpExec.scala | 10 +- .../substringSearch/SubstringSearchOpDesc.scala | 18 +- .../substringSearch/SubstringSearchOpExec.scala | 8 +- .../SymmetricDifferenceOpDesc.scala | 14 +- .../SymmetricDifferenceOpExec.scala | 6 +- .../operator/timeSeriesPlot/TimeSeriesPlot.scala | 14 +- .../operator/typecasting/TypeCastingOpDesc.scala | 18 +- .../operator/typecasting/TypeCastingOpExec.scala | 8 +- .../operator/typecasting/TypeCastingUnit.java | 6 +- .../amber/operator/udf/java/JavaUDFOpDesc.scala | 20 +- .../python/DualInputPortsPythonUDFOpDescV2.scala | 14 +- .../operator/udf/python/LambdaAttributeUnit.java | 8 +- .../udf/python/PythonLambdaFunctionOpDesc.scala | 10 +- .../udf/python/PythonTableReducerOpDesc.scala | 10 +- .../operator/udf/python/PythonUDFOpDescV2.scala | 14 +- .../python/source/PythonUDFSourceOpDescV2.scala | 14 +- .../amber/operator/udf/r/RUDFOpDesc.scala | 14 +- .../amber/operator/udf/r/RUDFSourceOpDesc.scala | 14 +- .../amber/operator/union/UnionOpDesc.scala | 14 +- .../amber/operator/union/UnionOpExec.scala | 6 +- .../operator/unneststring/UnnestStringOpDesc.scala | 25 +- .../operator/unneststring/UnnestStringOpExec.scala | 8 +- .../operator/util/OperatorDescriptorUtils.scala | 2 +- .../visualization/DotPlot/DotPlotOpDesc.scala | 14 +- .../IcicleChart/IcicleChartOpDesc.scala | 16 +- .../operator/visualization/ImageUtility.scala | 2 +- .../ImageViz/ImageVisualizerOpDesc.scala | 14 +- .../ScatterMatrixChartOpDesc.scala | 14 +- .../visualization/barChart/BarChartOpDesc.scala | 14 +- .../boxViolinPlot/BoxViolinPlotOpDesc.scala | 14 +- .../BoxViolinPlotQuartileFunction.java | 2 +- .../bubbleChart/BubbleChartOpDesc.scala | 14 +- .../bulletChart/BulletChartOpDesc.scala | 14 +- .../bulletChart/BulletChartStepDefinition.scala | 2 +- .../candlestickChart/CandlestickChartOpDesc.scala | 14 +- .../choroplethMap/ChoroplethMapOpDesc.scala | 14 +- .../continuousErrorBands/BandConfig.scala | 6 +- .../ContinuousErrorBandsOpDesc.scala | 12 +- .../contourPlot/ContourPlotColoringFunction.java | 2 +- .../contourPlot/ContourPlotOpDesc.scala | 14 +- .../dendrogram/DendrogramOpDesc.scala | 14 +- .../dumbbellPlot/DumbbellDotConfig.scala | 4 +- .../dumbbellPlot/DumbbellPlotOpDesc.scala | 14 +- .../FigureFactoryTableConfig.scala | 4 +- .../FigureFactoryTableOpDesc.scala | 12 +- .../filledAreaPlot/FilledAreaPlotOpDesc.scala | 14 +- .../funnelPlot/FunnelPlotOpDesc.scala | 14 +- .../ganttChart/GanttChartOpDesc.scala | 14 +- .../gaugeChart/GaugeChartOpDesc.scala | 14 +- .../visualization/gaugeChart/GaugeChartSteps.scala | 2 +- .../visualization/heatMap/HeatMapOpDesc.scala | 14 +- .../hierarchychart/HierarchyChartOpDesc.scala | 14 +- .../hierarchychart/HierarchyChartType.java | 2 +- .../hierarchychart/HierarchySection.scala | 4 +- .../histogram/HistogramChartOpDesc.scala | 14 +- .../histogram2d/Histogram2DOpDesc.scala | 14 +- .../histogram2d/NormalizationType.java | 2 +- .../visualization/htmlviz/HtmlVizOpDesc.scala | 27 +- .../visualization/htmlviz/HtmlVizOpExec.scala | 8 +- .../visualization/lineChart/LineChartOpDesc.scala | 12 +- .../visualization/lineChart/LineConfig.scala | 4 +- .../operator/visualization/lineChart/LineMode.java | 2 +- .../nestedTable/NestedTableConfig.scala | 4 +- .../nestedTable/NestedTableOpDesc.scala | 12 +- .../networkGraph/NetworkGraphOpDesc.scala | 14 +- .../visualization/pieChart/PieChartOpDesc.scala | 14 +- .../quiverPlot/QuiverPlotOpDesc.scala | 14 +- .../RangeSliderHandleDuplicateFunction.java | 2 +- .../rangeSlider/RangeSliderOpDesc.scala | 14 +- .../sankeyDiagram/SankeyDiagramOpDesc.scala | 14 +- .../scatter3DChart/Scatter3dChartOpDesc.scala | 14 +- .../scatterplot/ScatterplotOpDesc.scala | 14 +- .../stripChart/StripChartOpDesc.scala | 14 +- .../visualization/tablesChart/TablesConfig.scala | 4 +- .../tablesChart/TablesPlotOpDesc.scala | 12 +- .../ternaryPlot/TernaryPlotOpDesc.scala | 14 +- .../visualization/treeplot/TreeplotOpDesc.scala | 14 +- .../visualization/urlviz/UrlVizOpDesc.scala | 27 +- .../visualization/urlviz/UrlVizOpExec.scala | 8 +- .../volcanoPlot/VolcanoPlotOpDesc.scala | 14 +- .../waterfallChart/WaterfallChartOpDesc.scala | 14 +- .../visualization/wordCloud/WordCloudOpDesc.scala | 16 +- .../amber/util/ObjectMapperUtils.scala | 4 +- .../amber/operator/aggregate/AggregateOpSpec.scala | 6 +- .../CartesianProductOpExecSpec.scala | 6 +- .../dictionary/DictionaryMatcherOpExecSpec.scala | 8 +- .../operator/difference/DifferenceOpExecSpec.scala | 4 +- .../operator/distinct/DistinctOpExecSpec.scala | 4 +- .../filter/SpecializedFilterOpExecSpec.scala | 6 +- .../amber/operator/hashJoin/HashJoinOpSpec.scala | 10 +- .../operator/intersect/IntersectOpExecSpec.scala | 8 +- .../operator/intervalJoin/IntervalOpExecSpec.scala | 10 +- .../keywordSearch/KeywordSearchOpExecSpec.scala | 6 +- .../operator/projection/ProjectionOpDescSpec.scala | 6 +- .../operator/projection/ProjectionOpExecSpec.scala | 6 +- .../operator/sort/StableMergeSortOpExecSpec.scala | 6 +- .../sortPartitions/SortPartitionsOpExecSpec.scala | 6 +- .../source/fetcher/URLFetcherOpExecSpec.scala | 6 +- .../source/scan/FileScanSourceOpExecSpec.scala | 162 +++++++ .../source/scan/csv/CSVScanSourceOpDescSpec.scala | 13 +- .../scan/text/FileScanSourceOpDescSpec.scala | 12 +- .../scan/text/TextInputSourceOpDescSpec.scala | 10 +- .../SymmetricDifferenceOpExecSpec.scala | 4 +- .../typecasting/TypeCastingOpExecSpec.scala | 6 +- .../python/PythonLambdaFunctionOpDescSpec.scala | 6 +- .../unneststring/UnnestStringOpExecSpec.scala | 8 +- .../visualization/DotPlot/DotPlotOpDescSpec.scala | 2 +- .../ImageViz/ImageVisualizerOpDescSpec.scala | 2 +- .../barChart/BarChartOpDescSpec.scala | 2 +- .../bubbleChart/BubbleChartOpDescSpec.scala | 2 +- .../filledAreaPlot/FilledAreaPlotOpDescSpec.scala | 2 +- .../ganttChart/GanttChartOpDescSpec.scala | 2 +- .../hierarchychart/HierarchyChartOpDescSpec.scala | 2 +- .../visualization/htmlviz/HtmlVizOpExecSpec.scala | 8 +- .../pieChart/PieChartOpDescSpec.scala | 2 +- .../scatterplot/ScatterPlotOpDescSpec.scala | 2 +- .../{ => texera}/amber/util/ArrowUtilsSpec.scala | 6 +- .../service/ComputingUnitManagingService.scala | 2 +- .../resource/ComputingUnitManagingResource.scala | 2 +- .../org/apache/texera/service/ConfigService.scala | 2 +- .../texera/service/resource/ConfigResource.scala | 1 + file-service/build.sbt | 3 - .../org/apache/texera/service/FileService.scala | 4 +- .../texera/service/resource/DatasetResource.scala | 8 +- .../service/type/dataset/DatasetFileNode.scala | 2 +- .../org/apache/texera/service/MockLakeFS.scala | 2 +- .../service/resource/DatasetResourceSpec.scala | 2 +- frontend/package.json | 7 +- frontend/proxy.config.json | 10 + frontend/src/app/app.module.ts | 13 +- .../app/common/service/gui-config.service.mock.ts | 1 + frontend/src/app/common/type/gui-config.ts | 1 + frontend/src/app/common/type/physical-plan.ts | 4 +- .../amber/engine/common/virtualidentity.ts | 2 +- .../{ => texera}/amber/engine/common/workflow.ts | 2 +- .../src/app/common/util/port-identity-serde.ts | 2 +- .../agent-chat/agent-chat.component.html | 424 ++++++++++++++++ .../agent-chat/agent-chat.component.scss | 243 ++++++++++ .../agent-chat/agent-chat.component.spec.ts | 65 +++ .../agent-panel/agent-chat/agent-chat.component.ts | 277 +++++++++++ .../agent-panel/agent-panel.component.html | 138 ++++++ .../agent-panel/agent-panel.component.scss | 164 +++++++ .../component/agent-panel/agent-panel.component.ts | 200 ++++++++ .../agent-registration.component.html | 101 ++++ .../agent-registration.component.scss | 147 ++++++ .../agent-registration.component.ts | 112 +++++ .../app/workspace/component/menu/menu.component.ts | 6 +- .../workflow-editor/workflow-editor.component.scss | 4 - .../workflow-editor/workflow-editor.component.ts | 3 +- .../workspace/component/workspace.component.html | 1 + .../app/workspace/component/workspace.component.ts | 4 + .../compile-workflow/workflow-compiling.service.ts | 11 + .../workspace/service/copilot/copilot-prompts.ts | 29 +- .../copilot/texera-copilot-manager.service.spec.ts | 259 ++++++++++ .../copilot/texera-copilot-manager.service.ts | 248 ++++++++++ .../service/copilot/texera-copilot.spec.ts | 189 ++++++++ .../workspace/service/copilot/texera-copilot.ts | 388 +++++++++++++++ .../current-workflow-editing-observing-tools.ts | 127 +++++ .../copilot/tool/react-step-operator-parser.ts | 152 ++++++ .../service/copilot/tool/tools-utility.ts | 138 ++++++ .../copilot/tool/workflow-metadata-tools.ts | 140 ++++++ .../workflow-graph/util/workflow-util.service.ts | 15 +- frontend/yarn.lock | 106 ++++ .../amber/compiler/WorkflowCompiler.scala | 21 +- .../amber/compiler/model/LogicalLink.scala | 6 +- .../amber/compiler/model/LogicalPlan.scala | 12 +- .../amber/compiler/model/LogicalPlanPojo.scala | 4 +- .../texera/service/WorkflowCompilingService.scala | 4 +- .../resource/WorkflowCompilationResource.scala | 14 +- .../resource/WorkflowCompilationResourceSpec.scala | 18 +- 800 files changed, 10631 insertions(+), 4532 deletions(-) diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/ControllerProcessor.scala index 0cd28e0ca6,7a8e94cf3a..5c5a29755a --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/ControllerProcessor.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/ControllerProcessor.scala @@@ -27,14 -27,14 +27,14 @@@ import org.apache.texera.amber.engine.a AkkaMessageTransferService, AmberProcessor } - import org.apache.amber.engine.architecture.controller.execution.WorkflowExecution - import org.apache.amber.engine.architecture.logreplay.ReplayLogManager - import org.apache.amber.engine.architecture.scheduling.WorkflowExecutionCoordinator - import org.apache.amber.engine.architecture.worker.WorkflowWorker.MainThreadDelegateMessage - import org.apache.amber.engine.common.ambermessage.WorkflowFIFOMessage + import org.apache.texera.amber.engine.architecture.controller.execution.WorkflowExecution + import org.apache.texera.amber.engine.architecture.logreplay.ReplayLogManager + import org.apache.texera.amber.engine.architecture.scheduling.WorkflowExecutionCoordinator + import org.apache.texera.amber.engine.architecture.worker.WorkflowWorker.MainThreadDelegateMessage + import org.apache.texera.amber.engine.common.ambermessage.WorkflowFIFOMessage class ControllerProcessor( - workflowContext: WorkflowContext, + val workflowContext: WorkflowContext, controllerConfig: ControllerConfig, actorId: ActorVirtualIdentity, outputHandler: Either[MainThreadDelegateMessage, WorkflowFIFOMessage] => Unit diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/promisehandlers/PortCompletedHandler.scala index c4de572761,0add56eba3..a2c7894bc2 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/promisehandlers/PortCompletedHandler.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/promisehandlers/PortCompletedHandler.scala @@@ -17,19 -17,23 +17,19 @@@ * under the License. */ - package org.apache.amber.engine.architecture.controller.promisehandlers + package org.apache.texera.amber.engine.architecture.controller.promisehandlers import com.twitter.util.Future - import org.apache.amber.core.WorkflowRuntimeException - import org.apache.amber.core.storage.DocumentFactory - import org.apache.amber.core.workflow.GlobalPortIdentity - import org.apache.amber.core.workflow.cache.FingerprintUtil - import org.apache.amber.engine.architecture.controller.{ControllerAsyncRPCHandlerInitializer, FatalError} - import org.apache.amber.engine.architecture.rpc.controlcommands.{AsyncRPCContext, PortCompletedRequest, QueryStatisticsRequest} - import org.apache.amber.engine.architecture.rpc.controlreturns.EmptyReturn - import org.apache.amber.engine.common.virtualidentity.util.CONTROLLER - import org.apache.amber.util.VirtualIdentityUtils + import org.apache.texera.amber.core.WorkflowRuntimeException ++import org.apache.texera.amber.core.storage.DocumentFactory + import org.apache.texera.amber.core.workflow.GlobalPortIdentity -import org.apache.texera.amber.engine.architecture.controller.{ - ControllerAsyncRPCHandlerInitializer, - FatalError -} -import org.apache.texera.amber.engine.architecture.rpc.controlcommands.{ - AsyncRPCContext, - PortCompletedRequest, - QueryStatisticsRequest -} ++import org.apache.texera.amber.core.workflow.cache.FingerprintUtil ++import org.apache.texera.amber.engine.architecture.controller.{ControllerAsyncRPCHandlerInitializer, FatalError} ++import org.apache.texera.amber.engine.architecture.rpc.controlcommands.{AsyncRPCContext, PortCompletedRequest, QueryStatisticsRequest} + import org.apache.texera.amber.engine.architecture.rpc.controlreturns.EmptyReturn + import org.apache.texera.amber.engine.common.virtualidentity.util.CONTROLLER + import org.apache.texera.amber.util.VirtualIdentityUtils +import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource /** Notify the completion of a port: * - For input port, it means the worker has finished consuming and processing all the data diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala index 5d9233c035,0b68aad221..02ed38bbaf --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGenerator.scala @@@ -29,8 -29,7 +29,9 @@@ import org.apache.texera.amber.engine.a OutputPortConfig, ResourceConfig } - import org.apache.amber.engine.common.AmberLogging - import org.apache.amber.util.serde.GlobalPortIdentitySerde + import org.apache.texera.amber.engine.common.AmberLogging ++import org.apache.texera.amber.engine.common.AmberLogging ++import org.apache.texera.amber.util.serde.GlobalPortIdentitySerde import org.jgrapht.Graph import org.jgrapht.alg.connectivity.BiconnectivityInspector import org.jgrapht.graph.{DirectedAcyclicGraph, DirectedPseudograph} diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostEstimator.scala index 2fe09e815d,d86101a1f0..02ff87d2c0 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostEstimator.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/CostEstimator.scala @@@ -17,19 -17,22 +17,19 @@@ * under the License. */ - package org.apache.amber.engine.architecture.scheduling + package org.apache.texera.amber.engine.architecture.scheduling - import org.apache.amber.core.virtualidentity.ActorVirtualIdentity - import org.apache.amber.core.workflow.WorkflowContext - import org.apache.amber.engine.architecture.scheduling.DefaultCostEstimator.DEFAULT_OPERATOR_COST - import org.apache.amber.engine.architecture.scheduling.config.{ -import org.apache.texera.amber.core.storage.DocumentFactory -import org.apache.texera.amber.core.tuple.Tuple + import org.apache.texera.amber.core.virtualidentity.ActorVirtualIdentity + import org.apache.texera.amber.core.workflow.WorkflowContext + import org.apache.texera.amber.engine.architecture.scheduling.DefaultCostEstimator.DEFAULT_OPERATOR_COST -import org.apache.texera.amber.engine.architecture.scheduling.config.ResourceConfig ++import org.apache.texera.amber.engine.architecture.scheduling.config.{ + InputPortConfig, + IntermediateInputPortConfig, + OutputPortConfig, + ResourceConfig +} - import org.apache.amber.engine.architecture.scheduling.resourcePolicies.ResourceAllocator - import org.apache.amber.engine.common.AmberLogging + import org.apache.texera.amber.engine.architecture.scheduling.resourcePolicies.ResourceAllocator + import org.apache.texera.amber.engine.common.AmberLogging -import org.apache.texera.dao.SqlServer -import org.apache.texera.dao.SqlServer.withTransaction -import org.apache.texera.dao.jooq.generated.Tables.{WORKFLOW_EXECUTIONS, WORKFLOW_VERSION} - -import java.net.URI -import scala.util.{Failure, Success, Try} /** * A cost estimator should estimate a cost of running a region under the given resource constraints as units. diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/RegionExecutionCoordinator.scala index 54fd6ef703,7e5b228801..1bcd478b9a --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/RegionExecutionCoordinator.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/RegionExecutionCoordinator.scala @@@ -21,22 -21,39 +21,22 @@@ package org.apache.texera.amber.engine. import org.apache.pekko.pattern.gracefulStop import com.twitter.util.{Future, Return, Throw} - import org.apache.amber.core.storage.DocumentFactory - import org.apache.amber.core.storage.VFSURIFactory.decodeURI - import org.apache.amber.core.virtualidentity.ActorVirtualIdentity - import org.apache.amber.core.workflow.{GlobalPortIdentity, PhysicalLink, PhysicalOp} - import org.apache.amber.engine.architecture.common.{AkkaActorRefMappingService, AkkaActorService, ExecutorDeployment} - import org.apache.amber.engine.architecture.controller.execution.{OperatorExecution, RegionExecution, WorkflowExecution} - import org.apache.amber.engine.architecture.controller.{ControllerConfig, ExecutionStateUpdate, ExecutionStatsUpdate, WorkerAssignmentUpdate} - import org.apache.amber.engine.architecture.rpc.controlcommands._ - import org.apache.amber.engine.architecture.rpc.controlreturns.EmptyReturn - import org.apache.amber.engine.architecture.scheduling.config.{InputPortConfig, OperatorConfig, OutputPortConfig, ResourceConfig, WorkerConfig} - import org.apache.amber.engine.architecture.sendsemantics.partitionings.Partitioning - import org.apache.amber.engine.architecture.worker.statistics.{PortTupleMetricsMapping, TupleMetrics, WorkerState, WorkerStatistics} - import org.apache.amber.engine.common.AmberLogging - import org.apache.amber.engine.common.FutureBijection._ - import org.apache.amber.engine.common.rpc.AsyncRPCClient - import org.apache.amber.engine.common.virtualidentity.util.CONTROLLER + import org.apache.texera.amber.core.storage.DocumentFactory + import org.apache.texera.amber.core.storage.VFSURIFactory.decodeURI + import org.apache.texera.amber.core.virtualidentity.ActorVirtualIdentity + import org.apache.texera.amber.core.workflow.{GlobalPortIdentity, PhysicalLink, PhysicalOp} -import org.apache.texera.amber.engine.architecture.common.{ - AkkaActorRefMappingService, - AkkaActorService, - ExecutorDeployment -} -import org.apache.texera.amber.engine.architecture.controller.execution.{ - OperatorExecution, - RegionExecution, - WorkflowExecution -} -import org.apache.texera.amber.engine.architecture.controller.{ - ControllerConfig, - ExecutionStatsUpdate, - WorkerAssignmentUpdate -} ++import org.apache.texera.amber.engine.architecture.common.{AkkaActorRefMappingService, AkkaActorService, ExecutorDeployment} ++import org.apache.texera.amber.engine.architecture.controller.execution.{OperatorExecution, RegionExecution, WorkflowExecution} ++import org.apache.texera.amber.engine.architecture.controller.{ControllerConfig, ExecutionStateUpdate, ExecutionStatsUpdate, WorkerAssignmentUpdate} + import org.apache.texera.amber.engine.architecture.rpc.controlcommands._ + import org.apache.texera.amber.engine.architecture.rpc.controlreturns.EmptyReturn -import org.apache.texera.amber.engine.architecture.scheduling.config.{ - InputPortConfig, - OperatorConfig, - OutputPortConfig, - ResourceConfig -} ++import org.apache.texera.amber.engine.architecture.scheduling.config.{InputPortConfig, OperatorConfig, OutputPortConfig, ResourceConfig, WorkerConfig} + import org.apache.texera.amber.engine.architecture.sendsemantics.partitionings.Partitioning -import org.apache.texera.amber.engine.architecture.worker.statistics.WorkerState ++import org.apache.texera.amber.engine.architecture.worker.statistics.{PortTupleMetricsMapping, TupleMetrics, WorkerState, WorkerStatistics} + import org.apache.texera.amber.engine.common.AmberLogging + import org.apache.texera.amber.engine.common.FutureBijection._ + import org.apache.texera.amber.engine.common.rpc.AsyncRPCClient + import org.apache.texera.amber.engine.common.virtualidentity.util.CONTROLLER import org.apache.texera.web.SessionState import org.apache.texera.web.model.websocket.event.RegionStateEvent import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/WorkflowExecutionCoordinator.scala index 72c575ddd3,05585f88d8..dd6c1041da --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/WorkflowExecutionCoordinator.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/scheduling/WorkflowExecutionCoordinator.scala @@@ -21,11 -21,14 +21,14 @@@ package org.apache.texera.amber.engine. import com.twitter.util.Future import com.typesafe.scalalogging.LazyLogging - import org.apache.amber.core.workflow.{GlobalPortIdentity, PhysicalLink} - import org.apache.amber.engine.architecture.common.{AkkaActorRefMappingService, AkkaActorService} - import org.apache.amber.engine.architecture.controller.{ControllerConfig, ExecutionStateUpdate} - import org.apache.amber.engine.architecture.controller.execution.WorkflowExecution - import org.apache.amber.engine.common.rpc.AsyncRPCClient + import org.apache.texera.amber.core.workflow.{GlobalPortIdentity, PhysicalLink} + import org.apache.texera.amber.engine.architecture.common.{ + AkkaActorRefMappingService, + AkkaActorService + } -import org.apache.texera.amber.engine.architecture.controller.ControllerConfig ++import org.apache.texera.amber.engine.architecture.controller.{ControllerConfig, ExecutionStateUpdate} + import org.apache.texera.amber.engine.architecture.controller.execution.WorkflowExecution + import org.apache.texera.amber.engine.common.rpc.AsyncRPCClient import scala.collection.mutable diff --cc amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala index cf7f999d5f,741687e02c..5e6683024a --- a/amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala +++ b/amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala @@@ -20,17 -20,14 +20,17 @@@ package org.apache.texera.web.service import com.typesafe.scalalogging.LazyLogging - import org.apache.amber.core.virtualidentity.{ExecutionIdentity, WorkflowIdentity} - import org.apache.amber.core.workflow.WorkflowContext - import org.apache.amber.engine.architecture.controller.{ControllerConfig, Workflow} - import org.apache.amber.engine.architecture.rpc.controlcommands.EmptyRequest - import org.apache.amber.engine.architecture.rpc.controlreturns.WorkflowAggregatedState._ - import org.apache.amber.engine.common.Utils - import org.apache.amber.engine.common.client.AmberClient - import org.apache.amber.engine.common.executionruntimestate.ExecutionMetadataStore + import org.apache.texera.amber.core.virtualidentity.{ExecutionIdentity, WorkflowIdentity} + import org.apache.texera.amber.core.workflow.WorkflowContext + import org.apache.texera.amber.engine.architecture.controller.{ControllerConfig, Workflow} + import org.apache.texera.amber.engine.architecture.rpc.controlcommands.EmptyRequest + import org.apache.texera.amber.engine.architecture.rpc.controlreturns.WorkflowAggregatedState._ + import org.apache.texera.amber.engine.common.Utils + import org.apache.texera.amber.engine.common.client.AmberClient + import org.apache.texera.amber.engine.common.executionruntimestate.ExecutionMetadataStore +import org.apache.amber.core.workflow.cache.FingerprintUtil +import org.apache.amber.core.workflow.{CachedOutput, GlobalPortIdentity} +import org.apache.amber.util.serde.GlobalPortIdentitySerde.SerdeOps import org.apache.texera.web.model.websocket.event.{ TexeraWebSocketEvent, WorkflowErrorEvent, diff --cc common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/WorkflowSettings.scala index d44bd63c3c,88ebcb068f..c331de2def --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/WorkflowSettings.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/WorkflowSettings.scala @@@ -17,22 -17,9 +17,22 @@@ * under the License. */ - package org.apache.amber.core.workflow + package org.apache.texera.amber.core.workflow +import org.apache.amber.core.virtualidentity.ExecutionIdentity + +import java.net.URI + +case class CachedOutput( + resultUri: URI, + fingerprintJson: String, + tupleCount: Option[Long], + sourceExecutionId: Option[ExecutionIdentity] +) + case class WorkflowSettings( dataTransferBatchSize: Int, - outputPortsNeedingStorage: Set[GlobalPortIdentity] = Set.empty + outputPortsNeedingStorage: Set[GlobalPortIdentity] = Set.empty, + // serialized GlobalPortIdentity -> cached output + cachedOutputs: Map[String, CachedOutput] = Map.empty )
