This is an automated email from the ASF dual-hosted git repository. linxinyuan pushed a commit to branch xinyuan-cm-for-loop in repository https://gitbox.apache.org/repos/asf/texera.git
commit af3c697e5ee5b21d78b2093b93576896c65a58bd Merge: 6f49372567 27041a2b3c Author: Xinyuan Lin <[email protected]> AuthorDate: Wed Jan 21 18:48:01 2026 -0800 Merge branch 'main' into xinyuan-cm-for-loop .asf.yaml | 3 +- .github/release/vote-email-template.md | 85 + .github/workflows/build-and-push-images.yml | 513 ++++++ .github/workflows/check-header.yml | 2 +- .github/workflows/create-release-candidate.yml | 340 ++++ .github/workflows/github-action-build.yml | 15 +- .github/workflows/issue-triage.yml | 89 + .github/workflows/lint-pr.yml | 2 +- .licenserc.yaml | 15 + .run/AccessControlService.run.xml | 34 + .run/ComputingUnitManagingService.run.xml | 34 + .run/ComputingUnitMaster.run.xml | 34 + .run/ComputingUnitWorker.run.xml | 34 + .run/ConfigService.run.xml | 34 + .run/FileService.run.xml | 34 + .run/TexeraWebApplication.run.xml | 34 + .run/WorkflowCompilingService.run.xml | 34 + .run/frontend.run.xml | 30 + .run/texera micro services.run.xml | 31 + .run/texera-lakefs.run.xml | 28 + LICENSE | 38 + SECURITY.md | 272 ++++ .../texera/service/AccessControlService.scala | 11 +- .../service/resource/AccessControlResource.scala | 128 +- amber/build.sbt | 45 +- amber/operator-requirements.txt | 2 + amber/requirements.txt | 13 +- .../engine/architecture/rpc/controlcommands.proto | 18 +- .../architecture/rpc/controllerservice.proto | 6 +- .../engine/architecture/rpc/controlreturns.proto | 6 +- .../engine/architecture/rpc/testerservice.proto | 6 +- .../engine/architecture/rpc/workerservice.proto | 6 +- .../architecture/sendsemantics/partitionings.proto | 4 +- .../engine/architecture/worker/statistics.proto | 4 +- .../amber/engine/common/actormessage.proto | 2 +- .../amber/engine/common/ambermessage.proto | 8 +- .../engine/common/executionruntimestate.proto | 12 +- .../handlers/actorcommand/actor_handler_base.py | 2 +- .../handlers/actorcommand/backpressure_handler.py | 6 +- .../handlers/actorcommand/credit_update_handler.py | 2 +- .../handlers/control/add_input_channel_handler.py | 2 +- .../handlers/control/add_partitioning_handler.py | 3 +- .../handlers/control/assign_port_handler.py | 5 +- .../handlers/control/control_handler_base.py | 2 +- .../handlers/control/debug_command_handler.py | 2 +- .../handlers/control/end_channel_handler.py | 5 +- .../handlers/control/end_worker_handler.py | 2 +- .../control/evaluate_expression_handler.py | 3 +- .../control/initialize_executor_handler.py | 5 +- .../handlers/control/no_operation_handler.py | 3 +- .../handlers/control/open_executor_handler.py | 6 +- .../handlers/control/pause_worker_handler.py | 3 +- .../handlers/control/query_statistics_handler.py | 5 +- .../control/replay_current_tuple_handler.py | 8 +- .../handlers/control/resume_worker_handler.py | 3 +- .../handlers/control/start_channel_handler.py | 5 +- .../handlers/control/start_worker_handler.py | 6 +- .../handlers/control/update_executor_handler.py | 2 +- .../managers/console_message_manager.py | 2 +- .../python/core/architecture/managers/context.py | 4 +- .../managers/embedded_control_message_manager.py | 10 +- .../core/architecture/managers/executor_manager.py | 47 +- .../core/architecture/managers/pause_manager.py | 4 +- .../architecture/managers/statistics_manager.py | 4 +- .../architecture/managers/test_executor_manager.py | 248 +++ .../architecture/managers/test_pause_manager.py | 2 +- .../architecture/managers/test_state_manager.py | 2 +- .../managers/tuple_processing_manager.py | 2 +- .../core/architecture/packaging/input_manager.py | 5 +- .../core/architecture/packaging/output_manager.py | 8 +- .../core/architecture/rpc/async_rpc_client.py | 7 +- .../core/architecture/rpc/async_rpc_server.py | 6 +- .../sendsemantics/broad_cast_partitioner.py | 6 +- .../hash_based_shuffle_partitioner.py | 6 +- .../sendsemantics/one_to_one_partitioner.py | 6 +- .../core/architecture/sendsemantics/partitioner.py | 6 +- .../range_based_shuffle_partitioner.py | 6 +- .../sendsemantics/round_robin_partitioner.py | 6 +- .../src/main/python/core/models/RTableExecutor.py | 132 -- .../src/main/python/core/models/RTupleExecutor.py | 167 -- amber/src/main/python/core/models/__init__.py | 3 +- .../src/main/python/core/models/internal_queue.py | 7 +- amber/src/main/python/core/models/r_utils.py | 79 - .../src/main/python/core/models/schema/__init__.py | 2 + .../core/models/schema/arrow_schema_utils.py | 63 + .../python/core/models/schema/attribute_type.py | 6 + .../core/models/schema/attribute_type_utils.py | 72 + amber/src/main/python/core/models/schema/schema.py | 19 +- .../main/python/core/models/schema/test_schema.py | 65 + .../main/python/core/models/test_RTableExecutor.py | 539 ------ amber/src/main/python/core/models/test_tuple.py | 102 +- amber/src/main/python/core/models/tuple.py | 39 +- .../main/python/core/models/type/__init__.py} | 5 +- .../main/python/core/models/type/large_binary.py | 98 ++ .../python/core/models/type/test_large_binary.py | 88 + .../main/python/core/runnables/data_processor.py | 2 +- amber/src/main/python/core/runnables/main_loop.py | 12 +- .../main/python/core/runnables/network_receiver.py | 4 +- .../main/python/core/runnables/network_sender.py | 6 +- .../python/core/runnables/test_console_message.py | 6 +- .../main/python/core/runnables/test_main_loop.py | 12 +- .../python/core/runnables/test_network_receiver.py | 6 +- .../main/python/core/storage/document_factory.py | 5 +- .../python/core/storage/iceberg/iceberg_utils.py | 136 +- .../core/storage/iceberg/test_iceberg_document.py | 40 +- .../iceberg/test_iceberg_utils_large_binary.py | 230 +++ .../input_port_materialization_reader_runnable.py | 6 +- .../src/main/python/core/storage/storage_config.py | 19 +- .../main/python/core/storage/vfs_uri_factory.py | 2 +- .../main/python/core/util/buffer/timed_buffer.py | 2 +- .../core/util/console_message/replace_print.py | 2 +- .../core/util/console_message/timed_buffer.py | 4 +- .../core/util/expression_evaluator/__init__.py | 5 +- .../test_expression_evaluator.py | 5 +- .../python/core/util/virtual_identity/__init__.py | 2 +- amber/src/main/python/proto/__init__.py | 16 - amber/src/main/python/proto/org/__init__.py | 16 - amber/src/main/python/proto/org/apache/__init__.py | 16 - .../main/python/proto/org/apache/amber/__init__.py | 16 - .../proto/org/apache/amber/engine/__init__.py | 16 - .../apache/amber/engine/architecture/__init__.py | 16 - .../python/proto/org/apache/texera/__init__.py | 16 - .../proto/org/apache/texera/amber/__init__.py | 0 .../org/apache/{ => texera}/amber/core/__init__.py | 19 +- .../org/apache/texera/amber/engine/__init__.py | 0 .../texera/amber/engine/architecture/__init__.py | 0 .../amber/engine/architecture/rpc/__init__.py | 199 ++- .../engine/architecture/sendsemantics/__init__.py | 19 +- .../amber/engine/architecture/worker/__init__.py | 19 +- .../{ => texera}/amber/engine/common/__init__.py | 19 +- amber/src/main/python/proto/scalapb/__init__.py | 17 - amber/src/main/python/pyproject.toml | 20 +- amber/src/main/python/pytexera/__init__.py | 6 + .../pytexera/storage/dataset_file_document.py | 3 +- .../pytexera/storage/large_binary_input_stream.py | 121 ++ .../pytexera/storage/large_binary_manager.py | 78 + .../pytexera/storage/large_binary_output_stream.py | 244 +++ .../storage/test_large_binary_input_stream.py | 222 +++ .../pytexera/storage/test_large_binary_manager.py | 150 ++ .../storage/test_large_binary_output_stream.py | 238 +++ amber/src/main/python/texera_run_python_worker.py | 8 + .../amber/clustering/ClusterListener.scala | 24 +- .../amber/clustering/SingleNodeListener.scala | 6 +- .../common/AkkaActorRefMappingService.scala | 14 +- .../architecture/common/AkkaActorService.scala | 13 +- .../common/AkkaMessageTransferService.scala | 18 +- .../architecture/common/AmberProcessor.scala | 20 +- .../architecture/common/ExecutorDeployment.scala | 22 +- .../architecture/common/ProcessingStepCursor.scala | 6 +- .../engine/architecture/common/WorkflowActor.scala | 26 +- .../architecture/controller/ClientEvent.scala | 12 +- .../architecture/controller/Controller.scala | 30 +- .../ControllerAsyncRPCHandlerInitializer.scala | 14 +- .../controller/ControllerProcessor.scala | 18 +- .../controller/ControllerTimerService.scala | 14 +- .../controller/GlobalReplayManager.scala | 4 +- .../engine/architecture/controller/Workflow.scala | 4 +- .../controller/WorkflowScheduler.scala | 8 +- .../controller/execution/ChannelExecution.scala | 2 +- .../controller/execution/ExecutionUtils.scala | 11 +- .../controller/execution/LinkExecution.scala | 4 +- .../controller/execution/OperatorExecution.scala | 22 +- .../controller/execution/RegionExecution.scala | 14 +- .../controller/execution/WorkerPortExecution.scala | 2 +- .../controller/execution/WorkflowExecution.scala | 16 +- .../promisehandlers/ConsoleMessageHandler.scala | 8 +- .../promisehandlers/DebugCommandHandler.scala | 10 +- .../EmbeddedControlMessageHandler.scala | 14 +- .../EvaluatePythonExpressionHandler.scala | 10 +- .../promisehandlers/LinkWorkersHandler.scala | 8 +- .../controller/promisehandlers/PauseHandler.scala | 18 +- .../promisehandlers/PortCompletedHandler.scala | 16 +- .../QueryWorkerStatisticsHandler.scala | 19 +- .../controller/promisehandlers/ResumeHandler.scala | 13 +- .../RetrieveWorkflowStateHandler.scala | 16 +- .../promisehandlers/RetryWorkflowHandler.scala | 10 +- .../promisehandlers/StartWorkflowHandler.scala | 13 +- .../TakeGlobalCheckpointHandler.scala | 20 +- .../WorkerExecutionCompletedHandler.scala | 10 +- .../WorkerStateUpdatedHandler.scala | 10 +- .../architecture/deploysemantics/AddressInfo.scala | 4 +- .../deploystrategy/DeployStrategy.scala | 4 +- .../deploysemantics/deploystrategy/OneOnEach.scala | 4 +- .../deploystrategy/RandomDeployment.scala | 4 +- .../deploystrategy/RoundRobinDeployment.scala | 4 +- .../deploysemantics/layer/WorkerExecution.scala | 10 +- .../logreplay/AsyncReplayLogWriter.scala | 10 +- .../architecture/logreplay/EmptyReplayLogger.scala | 9 +- .../architecture/logreplay/OrderEnforcer.scala | 4 +- .../logreplay/ReplayLogGenerator.scala | 8 +- .../architecture/logreplay/ReplayLogManager.scala | 19 +- .../architecture/logreplay/ReplayLogger.scala | 9 +- .../architecture/logreplay/ReplayLoggerImpl.scala | 11 +- .../logreplay/ReplayOrderEnforcer.scala | 4 +- .../messaginglayer/AmberFIFOChannel.scala | 12 +- .../messaginglayer/CongestionControl.scala | 4 +- .../messaginglayer/DeadLetterMonitorActor.scala | 6 +- .../architecture/messaginglayer/FlowControl.scala | 8 +- .../architecture/messaginglayer/InputGateway.scala | 6 +- .../architecture/messaginglayer/InputManager.scala | 18 +- .../messaginglayer/NetworkInputGateway.scala | 8 +- .../messaginglayer/NetworkOutputGateway.scala | 10 +- .../messaginglayer/OrderingEnforcer.scala | 2 +- .../messaginglayer/OutputManager.scala | 28 +- .../architecture/messaginglayer/WorkerPort.scala | 6 +- .../messaginglayer/WorkerTimerService.scala | 21 +- .../pythonworker/PythonProxyClient.scala | 22 +- .../pythonworker/PythonProxyServer.scala | 20 +- .../pythonworker/PythonWorkflowWorker.scala | 34 +- .../pythonworker/WorkerBatchInternalQueue.scala | 12 +- .../scheduling/CostBasedScheduleGenerator.scala | 18 +- .../architecture/scheduling/CostEstimator.scala | 18 +- .../ExpansionGreedyScheduleGenerator.scala | 14 +- .../engine/architecture/scheduling/Region.scala | 8 +- .../scheduling/RegionExecutionCoordinator.scala | 38 +- .../architecture/scheduling/RegionPlan.scala | 4 +- .../engine/architecture/scheduling/Schedule.scala | 2 +- .../scheduling/ScheduleGenerator.scala | 8 +- .../architecture/scheduling/SchedulingUtils.scala | 2 +- .../scheduling/WorkflowExecutionCoordinator.scala | 15 +- .../scheduling/config/ChannelConfig.scala | 6 +- .../scheduling/config/LinkConfig.scala | 8 +- .../scheduling/config/OperatorConfig.scala | 2 +- .../scheduling/config/PortConfig.scala | 4 +- .../scheduling/config/ResourceConfig.scala | 6 +- .../scheduling/config/WorkerConfig.scala | 10 +- .../resourcePolicies/ExecutionClusterInfo.scala | 2 +- .../resourcePolicies/ResourceAllocator.scala | 22 +- .../partitioners/BroadcastPartitioner.scala | 8 +- .../partitioners/HashBasedShufflePartitioner.scala | 8 +- .../partitioners/OneToOnePartitioner.scala | 8 +- .../sendsemantics/partitioners/Partitioner.scala | 14 +- .../RangeBasedShufflePartitioner.scala | 8 +- .../partitioners/RoundRobinPartitioner.scala | 8 +- .../engine/architecture/worker/DPThread.scala | 25 +- .../engine/architecture/worker/DataProcessor.scala | 42 +- .../DataProcessorRPCHandlerInitializer.scala | 16 +- .../worker/EmbeddedControlMessageManager.scala | 12 +- .../engine/architecture/worker/PauseManager.scala | 8 +- .../engine/architecture/worker/PauseType.scala | 4 +- .../architecture/worker/WorkflowWorker.scala | 33 +- .../InputPortMaterializationReaderThread.scala | 30 +- .../managers/OutputPortResultWriterThread.scala | 6 +- .../worker/managers/SerializationManager.scala | 16 +- .../worker/managers/StatisticsManager.scala | 8 +- .../promisehandlers/AddInputChannelHandler.scala | 14 +- .../promisehandlers/AddPartitioningHandler.scala | 14 +- .../worker/promisehandlers/AssignPortHandler.scala | 23 +- .../worker/promisehandlers/EndChannelHandler.scala | 17 +- .../worker/promisehandlers/EndHandler.scala | 11 +- .../promisehandlers/EndIterationHandler.scala | 0 .../FinalizeCheckpointHandler.scala | 14 +- .../FlushNetworkBufferHandler.scala | 11 +- .../InitializeExecutorHandler.scala | 14 +- .../promisehandlers/NextIterationHandler.scala | 0 .../promisehandlers/OpenExecutorHandler.scala | 11 +- .../worker/promisehandlers/PauseHandler.scala | 20 +- .../promisehandlers/PrepareCheckpointHandler.scala | 16 +- .../promisehandlers/QueryStatisticsHandler.scala | 13 +- .../worker/promisehandlers/ResumeHandler.scala | 16 +- .../promisehandlers/RetrieveStateHandler.scala | 11 +- .../promisehandlers/StartChannelHandler.scala | 17 +- .../worker/promisehandlers/StartHandler.scala | 21 +- .../amber/engine/common/AmberConfig.scala | 6 +- .../amber/engine/common/AmberKryoInitializer.scala | 14 +- .../amber/engine/common/AmberLogging.scala | 6 +- .../amber/engine/common/AmberRuntime.scala | 26 +- .../amber/engine/common/CheckpointState.scala | 2 +- .../amber/engine/common/CheckpointSupport.scala | 6 +- .../amber/engine/common/ElidableStatement.scala | 2 +- .../amber/engine/common/FutureBijection.scala | 2 +- .../amber/engine/common/SerializedState.scala | 4 +- .../{ => texera}/amber/engine/common/Utils.scala | 4 +- .../amberexception/BreakpointException.scala | 4 +- .../engine/common/ambermessage/DataPayload.scala | 6 +- .../ambermessage/DirectControlMessagePayload.scala | 2 +- .../common/ambermessage/RecoveryPayload.scala | 6 +- .../ambermessage/WorkflowFIFOMessagePayload.scala | 2 +- .../common/ambermessage/WorkflowMessage.scala | 4 +- .../amber/engine/common/client/AmberClient.scala | 29 +- .../amber/engine/common/client/ClientActor.scala | 39 +- .../amber/engine/common/rpc/AsyncRPCClient.scala | 24 +- .../common/rpc/AsyncRPCHandlerInitializer.scala | 14 +- .../amber/engine/common/rpc/AsyncRPCServer.scala | 17 +- .../common/statetransition/StateManager.scala | 8 +- .../statetransition/WorkerStateManager.scala | 8 +- .../engine/common/storage/EmptyRecordStorage.scala | 4 +- .../engine/common/storage/HDFSRecordStorage.scala | 4 +- .../common/storage/SequentialRecordStorage.scala | 6 +- .../engine/common/storage/VFSRecordStorage.scala | 4 +- .../amber/engine/common/virtualidentity/util.scala | 4 +- .../{ => texera}/amber/error/ErrorUtils.scala | 12 +- .../apache/texera/web/ComputingUnitMaster.scala | 26 +- .../apache/texera/web/ComputingUnitWorker.scala | 2 +- .../texera/web/ServletAwareConfigurator.scala | 2 + .../scala/org/apache/texera/web/SessionState.scala | 2 +- .../apache/texera/web/TexeraWebApplication.scala | 8 +- .../texera/web/WorkflowLifecycleManager.scala | 10 +- .../apache/texera/web/auth/GuestAuthFilter.scala | 2 +- .../apache/texera/web/auth/UserAuthenticator.scala | 13 +- .../http/response/SchemaPropagationResponse.scala | 2 +- .../websocket/event/PaginatedResultEvent.scala | 2 +- .../model/websocket/event/WorkflowErrorEvent.scala | 2 +- .../event/python/ConsoleUpdateEvent.scala | 2 +- .../request/EditingTimeCompilationRequest.scala | 2 +- .../websocket/request/ModifyLogicRequest.scala | 2 +- .../request/ResultPaginationRequest.scala | 5 +- .../websocket/request/WorkflowExecuteRequest.scala | 4 +- .../python/PythonExpressionEvaluateResponse.scala | 2 +- .../web/resource/CollaborationResource.scala | 2 +- .../web/resource/SystemMetadataResource.scala | 2 +- .../web/resource/WorkflowWebsocketResource.scala | 12 +- .../resource/aiassistant/AiAssistantManager.scala | 2 +- .../dashboard/DatasetSearchQueryBuilder.scala | 2 +- .../dashboard/admin/user/AdminUserResource.scala | 6 +- .../web/resource/dashboard/hub/HubResource.scala | 2 +- .../web/resource/dashboard/user/UserResource.scala | 74 + .../user/workflow/WorkflowAccessResource.scala | 46 +- .../user/workflow/WorkflowExecutionsResource.scala | 23 +- .../dashboard/user/workflow/WorkflowResource.scala | 39 +- .../user/workflow/WorkflowVersionResource.scala | 2 +- .../web/service/EmailNotificationService.scala | 2 +- .../web/service/ExecutionConsoleService.scala | 30 +- .../service/ExecutionReconfigurationService.scala | 6 +- .../web/service/ExecutionResultService.scala | 57 +- .../web/service/ExecutionRuntimeService.scala | 12 +- .../texera/web/service/ExecutionStatsService.scala | 33 +- .../service/ExecutionsMetadataPersistService.scala | 2 +- .../service/FriesReconfigurationAlgorithm.scala | 8 +- .../texera/web/service/ResultExportService.scala | 14 +- .../texera/web/service/WorkflowEmailNotifier.scala | 4 +- .../web/service/WorkflowExecutionService.scala | 16 +- .../texera/web/service/WorkflowService.scala | 32 +- .../storage/ExecutionReconfigurationStore.scala | 6 +- .../texera/web/storage/ExecutionStateStore.scala | 6 +- .../org/apache/texera/web/storage/StateStore.scala | 2 +- .../texera/web/storage/WorkflowStateStore.scala | 2 +- .../org/apache/texera/workflow/LogicalLink.scala | 4 +- .../org/apache/texera/workflow/LogicalPlan.scala | 8 +- .../apache/texera/workflow/WorkflowCompiler.scala | 6 +- .../GitVersionControlLocalFileStorageSpec.java | 4 +- .../breakpoint/ExceptionBreakpointSpec.scala | 10 +- .../architecture/control/TrivialControlSpec.scala | 26 +- .../architecture/control/utils/ChainHandler.scala | 6 +- .../control/utils/CollectHandler.scala | 6 +- .../architecture/control/utils/ErrorHandler.scala | 6 +- .../control/utils/MultiCallHandler.scala | 8 +- .../architecture/control/utils/NestedHandler.scala | 6 +- .../control/utils/PingPongHandler.scala | 6 +- .../control/utils/RecursionHandler.scala | 6 +- .../utils/TesterAsyncRPCHandlerInitializer.scala | 12 +- .../control/utils/TrivialControlTester.scala | 24 +- .../architecture/controller/ControllerSpec.scala | 10 +- .../messaginglayer/NetworkInputGatewaySpec.scala | 8 +- .../messaginglayer/OutputManagerSpec.scala | 12 +- .../messaginglayer/RangeBasedShuffleSpec.scala | 10 +- .../pythonworker/PythonWorkflowWorkerSpec.scala | 34 +- .../CostBasedScheduleGeneratorSpec.scala | 10 +- .../scheduling/DefaultCostEstimatorSpec.scala | 30 +- .../ExpansionGreedyScheduleGeneratorSpec.scala | 19 +- .../engine/architecture/worker/DPThreadSpec.scala | 31 +- .../architecture/worker/DataProcessorSpec.scala | 34 +- .../engine/architecture/worker/WorkerSpec.scala | 44 +- .../engine/e2e/BatchSizePropagationSpec.scala | 26 +- .../amber/engine/e2e/DataProcessingSpec.scala | 36 +- .../{ => texera}/amber/engine/e2e/PauseSpec.scala | 29 +- .../{ => texera}/amber/engine/e2e/TestUtils.scala | 10 +- .../engine/faulttolerance/CheckpointSpec.scala | 27 +- .../amber/engine/faulttolerance/LoggingSpec.scala | 32 +- .../amber/engine/faulttolerance/ReplaySpec.scala | 29 +- .../dashboard/file/WorkflowResourceSpec.scala | 17 + .../workflow/WorkflowVersionResourceSpec.scala | 2 +- .../web/service/ExecutionConsoleServiceSpec.scala | 7 +- .../web/service/ExecutionResultServiceSpec.scala | 2 +- .../storage/ReadonlyLocalFileDocumentSpec.scala | 6 +- bin/build-images.sh | 91 +- bin/computing-unit-master.dockerfile | 58 +- bin/computing-unit-worker.dockerfile | 66 +- bin/fix-format.sh | 8 +- bin/k8s/values.yaml | 16 +- .../r-requirements.txt => bin/litellm-config.yaml | 20 +- bin/merge-image-tags.sh | 20 +- bin/python-proto-gen.sh | 2 +- bin/single-node/.env | 11 +- bin/single-node/docker-compose.yml | 101 +- bin/single-node/nginx.conf | 12 + build.sbt | 16 +- .../scala/org/apache/texera/auth/JwtParser.scala | 2 +- common/config/src/main/resources/cluster.conf | 16 +- common/config/src/main/resources/gui.conf | 8 + .../config/src/main/resources/llm.conf | 18 +- .../{ => texera}/amber/config/AkkaConfig.scala | 4 +- .../amber/config/ApplicationConfig.scala | 2 +- .../amber/config/EnvironmentalVariable.scala | 2 +- .../{ => texera}/amber/config/StorageConfig.scala | 4 +- .../{ => texera}/amber/config/UdfConfig.scala | 2 +- .../{ => texera}/amber/util/ConfigParserUtil.scala | 2 +- .../scala/org/apache/texera/config/GuiConfig.scala | 4 + .../config/LLMConfig.scala} | 13 +- common/workflow-core/build.sbt | 30 +- .../apache/{ => texera}/amber/core/executor.proto | 4 +- .../{ => texera}/amber/core/virtualidentity.proto | 2 +- .../apache/{ => texera}/amber/core/workflow.proto | 4 +- .../amber/core/workflowruntimestate.proto | 2 +- .../amber/core/WorkflowRuntimeException.scala | 4 +- .../amber/core/executor/ExecFactory.scala | 2 +- .../core/executor/JavaRuntimeCompilation.scala | 4 +- .../amber/core/executor/OperatorExecutor.scala | 8 +- .../core/executor/SourceOperatorExecutor.scala | 6 +- .../{ => texera}/amber/core/state/State.scala | 4 +- .../amber/core/storage/DocumentFactory.scala | 28 +- .../amber/core/storage/FileResolver.scala | 2 +- .../core/storage/IcebergCatalogInstance.scala | 6 +- .../amber/core/storage/VFSURIFactory.scala | 14 +- .../core/storage/model/BufferedItemWriter.scala | 2 +- .../core/storage/model/DatasetFileDocument.scala | 10 +- .../amber/core/storage/model/OnDataset.scala | 2 +- .../storage/model/ReadonlyLocalFileDocument.scala | 8 +- .../storage/model/ReadonlyVirtualDocument.scala | 5 +- .../core/storage/model/VirtualCollection.scala | 2 +- .../amber/core/storage/model/VirtualDocument.scala | 5 +- .../amber/core/storage/result/ResultSchema.scala | 4 +- .../core/storage/result/WorkflowResultStore.scala | 4 +- .../storage/result/iceberg/IcebergDocument.scala | 33 +- .../result/iceberg/IcebergTableWriter.scala | 8 +- .../core/storage/result/iceberg/OnIceberg.scala | 4 +- .../core/storage/util/LakeFSStorageClient.scala | 67 +- .../amber/core/storage/util/StorageUtil.scala | 2 +- .../dataset/GitVersionControlLocalFileStorage.java | 2 +- .../storage/util/dataset/JGitVersionControl.java | 2 +- .../storage/util/dataset/PhysicalFileNode.java | 2 +- .../{ => texera}/amber/core/tuple/Attribute.java | 2 +- .../amber/core/tuple/AttributeType.java | 5 +- .../amber/core/tuple/AttributeTypeUtils.scala | 167 +- .../texera/amber/core/tuple/LargeBinary.java | 109 ++ .../{ => texera}/amber/core/tuple/Schema.scala | 2 +- .../{ => texera}/amber/core/tuple/Tuple.scala | 4 +- .../{ => texera}/amber/core/tuple/TupleLike.scala | 6 +- .../{ => texera}/amber/core/tuple/TupleUtils.scala | 8 +- .../amber/core/workflow/LocationPreference.scala | 2 +- .../amber/core/workflow/PartitionInfo.scala | 2 +- .../amber/core/workflow/PhysicalOp.scala | 8 +- .../amber/core/workflow/PhysicalPlan.scala | 8 +- .../amber/core/workflow/WorkflowContext.scala | 6 +- .../amber/core/workflow/WorkflowSettings.scala | 2 +- .../{ => texera}/amber/util/ArrowUtils.scala | 55 +- .../{ => texera}/amber/util/IcebergUtil.scala | 96 +- .../apache/{ => texera}/amber/util/JSONUtils.scala | 6 +- .../amber/util/VirtualIdentityUtils.scala | 4 +- .../amber/util/serde/GlobalPortIdentitySerde.scala | 6 +- .../util/serde/PortIdentityKeyDeserializer.scala | 4 +- .../util/serde/PortIdentityKeySerializer.scala | 6 +- .../service/util/LargeBinaryInputStream.scala | 82 + .../texera/service/util/LargeBinaryManager.scala | 66 + .../service/util/LargeBinaryOutputStream.scala | 121 ++ .../texera/service/util/S3StorageClient.scala | 317 ++++ .../core/storage/model/VirtualDocumentSpec.scala | 2 +- .../amber/core/tuple/AttributeTypeUtilsSpec.scala | 197 ++- .../{ => texera}/amber/core/tuple/SchemaSpec.scala | 2 +- .../{ => texera}/amber/core/tuple/TupleSpec.scala | 4 +- .../amber/storage/FileResolverSpec.scala | 4 +- .../IcebergDocumentConsoleMessagesSpec.scala | 12 +- .../result/iceberg/IcebergDocumentSpec.scala | 14 +- .../result/iceberg/IcebergTableStatsSpec.scala | 14 +- .../{ => texera}/amber/util/IcebergUtilSpec.scala | 105 +- .../service/util/LargeBinaryInputStreamSpec.scala | 352 ++++ .../service/util/LargeBinaryManagerSpec.scala | 471 ++++++ .../service/util/LargeBinaryOutputStreamSpec.scala | 252 +++ .../texera/service/util/S3StorageClientSpec.scala | 337 ++++ .../texera/service/util/S3StorageTestBase.scala | 69 + .../jackson/jsonSchema/JsonSchemaDraft.java | 19 +- .../jackson/jsonSchema/JsonSchemaGenerator.scala | 19 +- .../JsonSchemaArrayWithUniqueItems.java | 20 +- .../jsonSchema/annotations/JsonSchemaBool.java | 20 +- .../jsonSchema/annotations/JsonSchemaDefault.java | 20 +- .../annotations/JsonSchemaDescription.java | 20 +- .../jsonSchema/annotations/JsonSchemaExamples.java | 20 +- .../jsonSchema/annotations/JsonSchemaFormat.java | 20 +- .../jsonSchema/annotations/JsonSchemaInject.java | 20 +- .../jsonSchema/annotations/JsonSchemaInt.java | 20 +- .../jsonSchema/annotations/JsonSchemaOptions.java | 20 +- .../jsonSchema/annotations/JsonSchemaString.java | 20 +- .../jsonSchema/annotations/JsonSchemaTitle.java | 20 +- .../amber/operator/DummyProperties.scala | 2 +- .../{ => texera}/amber/operator/LogicalOp.scala | 205 +-- .../amber/operator/PortDescriptor.scala | 4 +- .../amber/operator/PythonOperatorDescriptor.scala | 10 +- .../amber/operator/SpecialPhysicalOpFactory.scala | 12 +- .../amber/operator/TestOperators.scala | 20 +- .../amber/operator/aggregate/AggregateOpDesc.scala | 25 +- .../amber/operator/aggregate/AggregateOpExec.scala | 8 +- .../operator/aggregate/AggregationFunction.java | 2 +- .../operator/aggregate/AggregationOperation.scala | 124 +- .../aggregate/DistributedAggregation.scala | 4 +- .../cartesianProduct/CartesianProductOpDesc.scala | 18 +- .../cartesianProduct/CartesianProductOpExec.scala | 8 +- .../dictionary/DictionaryMatcherOpDesc.scala | 25 +- .../dictionary/DictionaryMatcherOpExec.scala | 8 +- .../amber/operator/dictionary/MatchingType.java | 2 +- .../operator/difference/DifferenceOpDesc.scala | 14 +- .../operator/difference/DifferenceOpExec.scala | 6 +- .../amber/operator/distinct/DistinctOpDesc.scala | 14 +- .../amber/operator/distinct/DistinctOpExec.scala | 6 +- .../amber/operator/dummy/DummyOpDesc.scala | 8 +- .../amber/operator/filter/ComparisonType.java | 2 +- .../amber/operator/filter/FilterOpDesc.scala | 8 +- .../amber/operator/filter/FilterOpExec.scala | 6 +- .../amber/operator/filter/FilterPredicate.java | 16 +- .../operator/filter/SpecializedFilterOpDesc.scala | 14 +- .../operator/filter/SpecializedFilterOpExec.scala | 6 +- .../amber/operator/flatmap/FlatMapOpDesc.scala | 4 +- .../amber/operator/flatmap/FlatMapOpExec.scala | 6 +- .../operator/hashJoin/HashJoinBuildOpExec.scala | 8 +- .../amber/operator/hashJoin/HashJoinOpDesc.scala | 24 +- .../operator/hashJoin/HashJoinProbeOpExec.scala | 10 +- .../amber/operator/hashJoin/JoinType.java | 2 +- .../HuggingFaceIrisLogisticRegressionOpDesc.scala | 12 +- .../HuggingFaceSentimentAnalysisOpDesc.scala | 12 +- .../HuggingFaceSpamSMSDetectionOpDesc.scala | 12 +- .../HuggingFaceTextSummarizationOpDesc.scala | 12 +- .../amber/operator/ifStatement/IfOpDesc.scala | 16 +- .../amber/operator/ifStatement/IfOpExec.scala | 12 +- .../amber/operator/intersect/IntersectOpDesc.scala | 14 +- .../amber/operator/intersect/IntersectOpExec.scala | 6 +- .../operator/intervalJoin/IntervalJoinOpDesc.scala | 20 +- .../operator/intervalJoin/IntervalJoinOpExec.scala | 12 +- .../operator/intervalJoin/TimeIntervalType.java | 2 +- .../keywordSearch/KeywordSearchOpDesc.scala | 18 +- .../keywordSearch/KeywordSearchOpExec.scala | 8 +- .../amber/operator/limit/LimitOpDesc.scala | 16 +- .../amber/operator/limit/LimitOpExec.scala | 8 +- .../Scorer/MachineLearningScorerOpDesc.scala | 12 +- .../Scorer/classificationMetricsFnc.java | 2 +- .../Scorer/regressionMetricsFnc.java | 2 +- ...SklearnAdvancedKNNClassifierTrainerOpDesc.scala | 4 +- .../KNNTrainer/SklearnAdvancedKNNParameters.java | 4 +- .../SklearnAdvancedKNNRegressorTrainerOpDesc.scala | 4 +- .../SVCTrainer/SklearnAdvancedSVCParameters.java | 4 +- .../SklearnAdvancedSVCTrainerOpDesc.scala | 4 +- .../SVRTrainer/SklearnAdvancedSVRParameters.java | 4 +- .../SklearnAdvancedSVRTrainerOpDesc.scala | 4 +- .../sklearnAdvanced/base/HyperParameters.java | 6 +- .../base/SklearnAdvancedBaseDesc.scala | 12 +- .../amber/operator/map/MapOpDesc.scala | 8 +- .../amber/operator/map/MapOpExec.scala | 6 +- .../amber/operator/metadata/OPVersion.java | 2 +- .../operator/metadata/OperatorGroupConstants.scala | 2 +- .../metadata/OperatorMetadataGenerator.scala | 10 +- .../operator/metadata/PropertyNameConstants.scala | 2 +- .../annotations/AutofillAttributeName.java | 2 +- .../annotations/AutofillAttributeNameLambda.java | 2 +- .../annotations/AutofillAttributeNameList.java | 2 +- .../annotations/AutofillAttributeNameOnPort1.java | 2 +- .../metadata/annotations/BatchByColumn.java | 2 +- .../annotations/CommonOpDescAnnotation.java | 2 +- .../metadata/annotations/EnablePresets.java | 2 +- .../metadata/annotations/HideAnnotation.java | 2 +- .../operator/metadata/annotations/UIWidget.java | 2 +- .../amber/operator/projection/AttributeUnit.java | 4 +- .../operator/projection/ProjectionOpDesc.scala | 20 +- .../operator/projection/ProjectionOpExec.scala | 8 +- .../randomksampling/RandomKSamplingOpDesc.scala | 16 +- .../randomksampling/RandomKSamplingOpExec.scala | 6 +- .../amber/operator/regex/RegexOpDesc.scala | 18 +- .../amber/operator/regex/RegexOpExec.scala | 8 +- .../ReservoirSamplingOpDesc.scala | 16 +- .../ReservoirSamplingOpExec.scala | 10 +- .../amber/operator/sink/ProgressiveUtils.scala | 4 +- .../sklearn/SklearnAdaptiveBoostingOpDesc.scala | 2 +- .../operator/sklearn/SklearnBaggingOpDesc.scala | 2 +- .../sklearn/SklearnBernoulliNaiveBayesOpDesc.scala | 2 +- .../operator/sklearn/SklearnClassifierOpDesc.scala | 12 +- .../SklearnComplementNaiveBayesOpDesc.scala | 2 +- .../sklearn/SklearnDecisionTreeOpDesc.scala | 2 +- .../sklearn/SklearnDummyClassifierOpDesc.scala | 2 +- .../operator/sklearn/SklearnExtraTreeOpDesc.scala | 2 +- .../operator/sklearn/SklearnExtraTreesOpDesc.scala | 2 +- .../sklearn/SklearnGaussianNaiveBayesOpDesc.scala | 2 +- .../sklearn/SklearnGradientBoostingOpDesc.scala | 2 +- .../amber/operator/sklearn/SklearnKNNOpDesc.scala | 2 +- .../sklearn/SklearnLinearRegressionOpDesc.scala | 12 +- .../operator/sklearn/SklearnLinearSVMOpDesc.scala | 2 +- .../SklearnLogisticRegressionCVOpDesc.scala | 2 +- .../sklearn/SklearnLogisticRegressionOpDesc.scala | 2 +- .../SklearnMultiLayerPerceptronOpDesc.scala | 2 +- .../SklearnMultinomialNaiveBayesOpDesc.scala | 2 +- .../sklearn/SklearnNearestCentroidOpDesc.scala | 2 +- .../sklearn/SklearnPassiveAggressiveOpDesc.scala | 2 +- .../operator/sklearn/SklearnPerceptronOpDesc.scala | 2 +- .../operator/sklearn/SklearnPredictionOpDesc.scala | 12 +- .../SklearnProbabilityCalibrationOpDesc.scala | 2 +- .../sklearn/SklearnRandomForestOpDesc.scala | 2 +- .../operator/sklearn/SklearnRidgeCVOpDesc.scala | 2 +- .../operator/sklearn/SklearnRidgeOpDesc.scala | 2 +- .../amber/operator/sklearn/SklearnSDGOpDesc.scala | 2 +- .../amber/operator/sklearn/SklearnSVMOpDesc.scala | 2 +- .../SklearnTrainingAdaptiveBoostingOpDesc.scala | 2 +- .../training/SklearnTrainingBaggingOpDesc.scala | 2 +- .../SklearnTrainingBernoulliNaiveBayesOpDesc.scala | 2 +- ...SklearnTrainingComplementNaiveBayesOpDesc.scala | 2 +- .../SklearnTrainingDecisionTreeOpDesc.scala | 2 +- .../SklearnTrainingDummyClassifierOpDesc.scala | 2 +- .../training/SklearnTrainingExtraTreeOpDesc.scala | 2 +- .../training/SklearnTrainingExtraTreesOpDesc.scala | 2 +- .../SklearnTrainingGaussianNaiveBayesOpDesc.scala | 2 +- .../SklearnTrainingGradientBoostingOpDesc.scala | 2 +- .../training/SklearnTrainingKNNOpDesc.scala | 2 +- .../SklearnTrainingLinearRegressionOpDesc.scala | 2 +- .../training/SklearnTrainingLinearSVMOpDesc.scala | 2 +- ...SklearnTrainingLogisticRegressionCVOpDesc.scala | 2 +- .../SklearnTrainingLogisticRegressionOpDesc.scala | 2 +- ...SklearnTrainingMultiLayerPerceptronOpDesc.scala | 2 +- ...klearnTrainingMultinomialNaiveBayesOpDesc.scala | 2 +- .../SklearnTrainingNearestCentroidOpDesc.scala | 2 +- .../sklearn/training/SklearnTrainingOpDesc.scala | 12 +- .../SklearnTrainingPassiveAggressiveOpDesc.scala | 2 +- .../training/SklearnTrainingPerceptronOpDesc.scala | 2 +- ...learnTrainingProbabilityCalibrationOpDesc.scala | 2 +- .../SklearnTrainingRandomForestOpDesc.scala | 2 +- .../training/SklearnTrainingRidgeCVOpDesc.scala | 2 +- .../training/SklearnTrainingRidgeOpDesc.scala | 2 +- .../training/SklearnTrainingSDGOpDesc.scala | 2 +- .../training/SklearnTrainingSVMOpDesc.scala | 2 +- .../amber/operator/sleep/SleepOpDesc.scala | 16 +- .../amber/operator/sleep/SleepOpExec.scala | 8 +- .../amber/operator/sort/SortCriteriaUnit.java | 6 +- .../amber/operator/sort/SortOpDesc.scala | 10 +- .../amber/operator/sort/SortPreference.java | 2 +- .../operator/sort/StableMergeSortOpDesc.scala | 16 +- .../operator/sort/StableMergeSortOpExec.scala | 8 +- .../sortPartitions/SortPartitionsOpDesc.scala | 18 +- .../sortPartitions/SortPartitionsOpExec.scala | 29 +- .../amber/operator/source/BufferedBlockReader.java | 2 +- .../source/PythonSourceOperatorDescriptor.scala | 4 +- .../operator/source/SourceOperatorDescriptor.scala | 6 +- .../apis/reddit/RedditSearchSourceOpDesc.scala | 10 +- .../apis/reddit/RedditSourceOperatorFunction.java | 2 +- .../source/apis/twitter/TwitterSourceOpDesc.scala | 8 +- .../source/apis/twitter/TwitterSourceOpExec.scala | 6 +- .../v2/TwitterFullArchiveSearchSourceOpDesc.scala | 18 +- .../v2/TwitterFullArchiveSearchSourceOpExec.scala | 10 +- .../twitter/v2/TwitterSearchSourceOpDesc.scala | 18 +- .../twitter/v2/TwitterSearchSourceOpExec.scala | 10 +- .../source/apis/twitter/v2/TwitterUtils.scala | 4 +- .../operator/source/cache/CacheSourceOpExec.scala | 10 +- .../operator/source/fetcher/DecodingMethod.java | 2 +- .../operator/source/fetcher/RandomUserAgent.java | 2 +- .../operator/source/fetcher/URLFetchUtil.scala | 2 +- .../operator/source/fetcher/URLFetcherOpDesc.scala | 18 +- .../operator/source/fetcher/URLFetcherOpExec.scala | 10 +- .../operator/source/scan/AutoClosingIterator.scala | 2 +- .../operator/source/scan/FileAttributeType.java | 9 +- .../operator/source/scan/FileDecodingMethod.java | 2 +- .../source/scan/FileScanSourceOpDesc.scala | 18 +- .../source/scan/FileScanSourceOpExec.scala | 28 +- .../operator/source/scan/ScanSourceOpDesc.scala | 12 +- .../source/scan/arrow/ArrowSourceOpDesc.scala | 20 +- .../source/scan/arrow/ArrowSourceOpExec.scala | 12 +- .../source/scan/csv/CSVScanSourceOpDesc.scala | 20 +- .../source/scan/csv/CSVScanSourceOpExec.scala | 10 +- .../scan/csv/ParallelCSVScanSourceOpDesc.scala | 20 +- .../scan/csv/ParallelCSVScanSourceOpExec.scala | 12 +- .../scan/csvOld/CSVOldScanSourceOpDesc.scala | 20 +- .../scan/csvOld/CSVOldScanSourceOpExec.scala | 10 +- .../source/scan/json/JSONLScanSourceOpDesc.scala | 20 +- .../source/scan/json/JSONLScanSourceOpExec.scala | 14 +- .../amber/operator/source/scan/json/JSONUtil.scala | 2 +- .../source/scan/text/TextInputSourceOpDesc.scala | 20 +- .../source/scan/text/TextInputSourceOpExec.scala | 12 +- .../source/scan/text/TextSourceOpDesc.scala | 6 +- .../operator/source/sql/SQLSourceOpDesc.scala | 8 +- .../operator/source/sql/SQLSourceOpExec.scala | 10 +- .../source/sql/asterixdb/AsterixDBConnUtil.scala | 2 +- .../sql/asterixdb/AsterixDBSourceOpDesc.scala | 24 +- .../sql/asterixdb/AsterixDBSourceOpExec.scala | 12 +- .../operator/source/sql/mysql/MySQLConnUtil.scala | 2 +- .../source/sql/mysql/MySQLSourceOpDesc.scala | 18 +- .../source/sql/mysql/MySQLSourceOpExec.scala | 10 +- .../source/sql/postgresql/PostgreSQLConnUtil.scala | 2 +- .../sql/postgresql/PostgreSQLSourceOpDesc.scala | 20 +- .../sql/postgresql/PostgreSQLSourceOpExec.scala | 10 +- .../amber/operator/split/SplitOpDesc.scala | 18 +- .../amber/operator/split/SplitOpExec.scala | 10 +- .../substringSearch/SubstringSearchOpDesc.scala | 18 +- .../substringSearch/SubstringSearchOpExec.scala | 8 +- .../SymmetricDifferenceOpDesc.scala | 14 +- .../SymmetricDifferenceOpExec.scala | 6 +- .../operator/timeSeriesPlot/TimeSeriesPlot.scala | 14 +- .../operator/typecasting/TypeCastingOpDesc.scala | 18 +- .../operator/typecasting/TypeCastingOpExec.scala | 8 +- .../operator/typecasting/TypeCastingUnit.java | 6 +- .../amber/operator/udf/java/JavaUDFOpDesc.scala | 20 +- .../python/DualInputPortsPythonUDFOpDescV2.scala | 14 +- .../operator/udf/python/LambdaAttributeUnit.java | 8 +- .../udf/python/PythonLambdaFunctionOpDesc.scala | 10 +- .../udf/python/PythonTableReducerOpDesc.scala | 10 +- .../operator/udf/python/PythonUDFOpDescV2.scala | 14 +- .../python/source/PythonUDFSourceOpDescV2.scala | 14 +- .../amber/operator/udf/r/RUDFOpDesc.scala | 14 +- .../amber/operator/udf/r/RUDFSourceOpDesc.scala | 14 +- .../amber/operator/union/UnionOpDesc.scala | 14 +- .../amber/operator/union/UnionOpExec.scala | 6 +- .../operator/unneststring/UnnestStringOpDesc.scala | 25 +- .../operator/unneststring/UnnestStringOpExec.scala | 8 +- .../operator/util/OperatorDescriptorUtils.scala | 2 +- .../visualization/DotPlot/DotPlotOpDesc.scala | 14 +- .../IcicleChart/IcicleChartOpDesc.scala | 16 +- .../operator/visualization/ImageUtility.scala | 2 +- .../ImageViz/ImageVisualizerOpDesc.scala | 14 +- .../ScatterMatrixChartOpDesc.scala | 14 +- .../visualization/barChart/BarChartOpDesc.scala | 14 +- .../boxViolinPlot/BoxViolinPlotOpDesc.scala | 14 +- .../BoxViolinPlotQuartileFunction.java | 2 +- .../bubbleChart/BubbleChartOpDesc.scala | 14 +- .../bulletChart/BulletChartOpDesc.scala | 14 +- .../bulletChart/BulletChartStepDefinition.scala | 2 +- .../candlestickChart/CandlestickChartOpDesc.scala | 14 +- .../choroplethMap/ChoroplethMapOpDesc.scala | 14 +- .../continuousErrorBands/BandConfig.scala | 6 +- .../ContinuousErrorBandsOpDesc.scala | 12 +- .../contourPlot/ContourPlotColoringFunction.java | 2 +- .../contourPlot/ContourPlotOpDesc.scala | 14 +- .../dendrogram/DendrogramOpDesc.scala | 14 +- .../dumbbellPlot/DumbbellDotConfig.scala | 4 +- .../dumbbellPlot/DumbbellPlotOpDesc.scala | 14 +- .../FigureFactoryTableConfig.scala | 4 +- .../FigureFactoryTableOpDesc.scala | 12 +- .../filledAreaPlot/FilledAreaPlotOpDesc.scala | 14 +- .../funnelPlot/FunnelPlotOpDesc.scala | 14 +- .../ganttChart/GanttChartOpDesc.scala | 14 +- .../gaugeChart/GaugeChartOpDesc.scala | 14 +- .../visualization/gaugeChart/GaugeChartSteps.scala | 2 +- .../visualization/heatMap/HeatMapOpDesc.scala | 14 +- .../hierarchychart/HierarchyChartOpDesc.scala | 14 +- .../hierarchychart/HierarchyChartType.java | 2 +- .../hierarchychart/HierarchySection.scala | 4 +- .../histogram/HistogramChartOpDesc.scala | 14 +- .../histogram2d/Histogram2DOpDesc.scala | 14 +- .../histogram2d/NormalizationType.java | 2 +- .../visualization/htmlviz/HtmlVizOpDesc.scala | 27 +- .../visualization/htmlviz/HtmlVizOpExec.scala | 8 +- .../visualization/lineChart/LineChartOpDesc.scala | 12 +- .../visualization/lineChart/LineConfig.scala | 4 +- .../operator/visualization/lineChart/LineMode.java | 2 +- .../nestedTable/NestedTableConfig.scala | 4 +- .../nestedTable/NestedTableOpDesc.scala | 12 +- .../networkGraph/NetworkGraphOpDesc.scala | 14 +- .../visualization/pieChart/PieChartOpDesc.scala | 14 +- .../quiverPlot/QuiverPlotOpDesc.scala | 14 +- .../RangeSliderHandleDuplicateFunction.java | 2 +- .../rangeSlider/RangeSliderOpDesc.scala | 14 +- .../sankeyDiagram/SankeyDiagramOpDesc.scala | 14 +- .../scatter3DChart/Scatter3dChartOpDesc.scala | 14 +- .../scatterplot/ScatterplotOpDesc.scala | 14 +- .../stripChart/StripChartOpDesc.scala | 14 +- .../visualization/tablesChart/TablesConfig.scala | 4 +- .../tablesChart/TablesPlotOpDesc.scala | 12 +- .../ternaryPlot/TernaryPlotOpDesc.scala | 14 +- .../visualization/treeplot/TreeplotOpDesc.scala | 14 +- .../visualization/urlviz/UrlVizOpDesc.scala | 27 +- .../visualization/urlviz/UrlVizOpExec.scala | 8 +- .../volcanoPlot/VolcanoPlotOpDesc.scala | 14 +- .../waterfallChart/WaterfallChartOpDesc.scala | 14 +- .../visualization/wordCloud/WordCloudOpDesc.scala | 16 +- .../amber/util/ObjectMapperUtils.scala | 4 +- .../amber/operator/aggregate/AggregateOpSpec.scala | 503 ++++++ .../CartesianProductOpExecSpec.scala | 6 +- .../dictionary/DictionaryMatcherOpExecSpec.scala | 8 +- .../operator/difference/DifferenceOpExecSpec.scala | 4 +- .../operator/distinct/DistinctOpExecSpec.scala | 4 +- .../filter/SpecializedFilterOpExecSpec.scala | 6 +- .../amber/operator/hashJoin/HashJoinOpSpec.scala | 10 +- .../operator/intersect/IntersectOpExecSpec.scala | 8 +- .../operator/intervalJoin/IntervalOpExecSpec.scala | 10 +- .../keywordSearch/KeywordSearchOpExecSpec.scala | 6 +- .../operator/projection/ProjectionOpDescSpec.scala | 6 +- .../operator/projection/ProjectionOpExecSpec.scala | 6 +- .../operator/sort/StableMergeSortOpExecSpec.scala | 6 +- .../sortPartitions/SortPartitionsOpExecSpec.scala | 6 +- .../source/fetcher/URLFetcherOpExecSpec.scala | 6 +- .../source/scan/FileScanSourceOpExecSpec.scala | 162 ++ .../source/scan/csv/CSVScanSourceOpDescSpec.scala | 13 +- .../scan/text/FileScanSourceOpDescSpec.scala | 12 +- .../scan/text/TextInputSourceOpDescSpec.scala | 10 +- .../SymmetricDifferenceOpExecSpec.scala | 4 +- .../typecasting/TypeCastingOpExecSpec.scala | 6 +- .../python/PythonLambdaFunctionOpDescSpec.scala | 6 +- .../unneststring/UnnestStringOpExecSpec.scala | 8 +- .../visualization/DotPlot/DotPlotOpDescSpec.scala | 2 +- .../ImageViz/ImageVisualizerOpDescSpec.scala | 2 +- .../barChart/BarChartOpDescSpec.scala | 2 +- .../bubbleChart/BubbleChartOpDescSpec.scala | 2 +- .../filledAreaPlot/FilledAreaPlotOpDescSpec.scala | 2 +- .../ganttChart/GanttChartOpDescSpec.scala | 2 +- .../hierarchychart/HierarchyChartOpDescSpec.scala | 2 +- .../visualization/htmlviz/HtmlVizOpExecSpec.scala | 8 +- .../pieChart/PieChartOpDescSpec.scala | 2 +- .../scatterplot/ScatterPlotOpDescSpec.scala | 2 +- .../{ => texera}/amber/util/ArrowUtilsSpec.scala | 148 +- .../service/ComputingUnitManagingService.scala | 2 +- .../resource/ComputingUnitManagingResource.scala | 2 +- .../org/apache/texera/service/ConfigService.scala | 2 +- .../texera/service/resource/ConfigResource.scala | 2 + file-service/build.sbt | 3 - .../org/apache/texera/service/FileService.scala | 4 +- .../texera/service/resource/DatasetResource.scala | 974 +++++++++-- .../service/type/dataset/DatasetFileNode.scala | 2 +- .../texera/service/util/S3StorageClient.scala | 142 -- .../org/apache/texera/service/MockLakeFS.scala | 58 +- .../service/resource/DatasetResourceSpec.scala | 1710 +++++++++++++++++++- frontend/nx.json | 1 - frontend/package.json | 7 +- frontend/proxy.config.json | 10 + frontend/src/app/app.module.ts | 13 +- frontend/src/app/common/formly/array.type.ts | 23 +- frontend/src/app/common/formly/multischema.type.ts | 20 +- frontend/src/app/common/formly/null.type.ts | 20 +- frontend/src/app/common/formly/object.type.ts | 20 +- .../app/common/service/gui-config.service.mock.ts | 2 + .../app/common/service/user/stub-user.service.ts | 8 + .../app/common/service/user/user.service.spec.ts | 2 + .../src/app/common/service/user/user.service.ts | 37 +- .../workflow-persist/workflow-persist.service.ts | 11 +- frontend/src/app/common/type/dataset.ts | 1 + frontend/src/app/common/type/gui-config.ts | 2 + frontend/src/app/common/type/physical-plan.ts | 4 +- .../amber/engine/common/virtualidentity.ts | 2 +- .../{ => texera}/amber/engine/common/workflow.ts | 2 +- frontend/src/app/common/type/user.ts | 1 + .../util/array-utils.ts} | 27 +- .../src/app/common/util/port-identity-serde.ts | 2 +- .../component/admin/user/admin-user.component.html | 6 + .../component/admin/user/admin-user.component.ts | 56 +- .../dashboard/component/dashboard.component.html | 32 + .../app/dashboard/component/dashboard.component.ts | 69 +- .../user/share-access/share-access.component.html | 19 +- .../user/share-access/share-access.component.ts | 97 +- .../dataset-detail.component.html | 3 +- .../dataset-detail.component.ts | 253 +-- .../user-dataset-file-renderer.component.ts | 6 + .../user-dataset-version-creator.component.ts | 1 + .../user-dataset-version-filetree.component.html | 15 +- .../user-dataset-version-filetree.component.scss | 2 +- .../user-dataset-version-filetree.component.ts | 18 +- .../service/user/dataset/dataset.service.ts | 283 ++-- frontend/src/app/dashboard/type/dashboard-entry.ts | 2 + .../browse-section/browse-section.component.html | 3 +- .../browse-section/browse-section.component.ts | 26 + .../landing-page/landing-page.component.html | 5 - .../detail/hub-workflow-detail.component.ts | 6 +- .../agent-chat/agent-chat.component.html | 424 +++++ .../agent-chat/agent-chat.component.scss | 243 +++ .../agent-chat/agent-chat.component.spec.ts | 65 + .../agent-panel/agent-chat/agent-chat.component.ts | 277 ++++ .../agent-panel/agent-panel.component.html | 138 ++ .../agent-panel/agent-panel.component.scss | 164 ++ .../component/agent-panel/agent-panel.component.ts | 200 +++ .../agent-registration.component.html | 101 ++ .../agent-registration.component.scss | 147 ++ .../agent-registration.component.ts | 112 ++ .../code-editor-dialog/code-editor.component.ts | 29 +- .../app/workspace/component/menu/menu.component.ts | 18 +- .../result-panel/result-panel.component.ts | 2 +- .../result-table-frame.component.html | 34 + .../result-table-frame.component.spec.ts | 13 + .../result-table-frame.component.ts | 63 +- .../workflow-editor/workflow-editor.component.scss | 4 - .../workflow-editor/workflow-editor.component.ts | 3 +- .../workspace/component/workspace.component.html | 1 + .../app/workspace/component/workspace.component.ts | 4 + .../compile-workflow/workflow-compiling.service.ts | 11 + .../workspace/service/copilot/copilot-prompts.ts | 28 +- .../copilot/texera-copilot-manager.service.spec.ts | 259 +++ .../copilot/texera-copilot-manager.service.ts | 248 +++ .../service/copilot/texera-copilot.spec.ts | 189 +++ .../workspace/service/copilot/texera-copilot.ts | 388 +++++ .../current-workflow-editing-observing-tools.ts | 127 ++ .../copilot/tool/react-step-operator-parser.ts | 152 ++ .../service/copilot/tool/tools-utility.ts | 138 ++ .../copilot/tool/workflow-metadata-tools.ts | 140 ++ .../workspace/service/joint-ui/joint-ui.service.ts | 41 +- .../model/workflow-action.service.ts | 2 +- .../workflow-graph/util/workflow-util.service.ts | 15 +- .../workflow-result/workflow-result.service.ts | 14 +- .../types/workflow-websocket.interface.ts | 3 + frontend/src/assets/svg/hub_icon.svg | 576 ------- .../assets/svg/operator-reuse-cache-invalid.svg | 26 +- .../src/assets/svg/operator-reuse-cache-valid.svg | 28 +- frontend/src/assets/svg/operator-view-result.svg | 20 +- frontend/yarn.lock | 106 ++ licenses/LICENSE-MIT.txt | 19 + project/plugins.sbt | 3 + .../src/language-server-runner.ts | 23 +- pyright-language-service/src/main.ts | 23 +- pyright-language-service/src/server-commons.ts | 24 +- .../src/types/hocon-parser.d.ts | 2 +- sql/texera_ddl.sql | 49 + .../FlatMapOpDesc.scala => sql/updates/16.sql | 11 +- sql/updates/17.sql | 66 + .../model/OnDataset.scala => sql/updates/18.sql | 14 +- sql/updates/19.sql | 60 + .../amber/compiler/WorkflowCompiler.scala | 21 +- .../amber/compiler/model/LogicalLink.scala | 6 +- .../amber/compiler/model/LogicalPlan.scala | 12 +- .../amber/compiler/model/LogicalPlanPojo.scala | 4 +- .../texera/service/WorkflowCompilingService.scala | 4 +- .../resource/WorkflowCompilationResource.scala | 14 +- .../resource/WorkflowCompilationResourceSpec.scala | 18 +- 908 files changed, 19093 insertions(+), 6473 deletions(-) diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/DataProcessor.scala index bd43385d3e,3aa5fa90a4..dae3446826 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/DataProcessor.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/DataProcessor.scala @@@ -41,13 -41,10 +41,13 @@@ import org.apache.texera.amber.engine.a NO_ALIGNMENT, PORT_ALIGNMENT } - import org.apache.amber.engine.architecture.rpc.controlcommands._ - import org.apache.amber.engine.architecture.rpc.controlreturns.EmptyReturn - import org.apache.amber.engine.architecture.rpc.workerservice.WorkerServiceGrpc.{ + import org.apache.texera.amber.engine.architecture.rpc.controlcommands._ + import org.apache.texera.amber.engine.architecture.rpc.controlreturns.EmptyReturn -import org.apache.texera.amber.engine.architecture.rpc.workerservice.WorkerServiceGrpc.METHOD_END_CHANNEL ++import org.apache.texera.amber.engine.architecture.rpc.workerservice.WorkerServiceGrpc.{ + METHOD_END_CHANNEL, + METHOD_END_ITERATION +} - import org.apache.amber.engine.architecture.worker.WorkflowWorker.{ + import org.apache.texera.amber.engine.architecture.worker.WorkflowWorker.{ DPInputQueueElement, MainThreadDelegateMessage } diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/promisehandlers/EndChannelHandler.scala index fc90fc174a,7794342690..43db78f6df --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/promisehandlers/EndChannelHandler.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/promisehandlers/EndChannelHandler.scala @@@ -17,15 -17,17 +17,18 @@@ * under the License. */ - package org.apache.amber.engine.architecture.worker.promisehandlers + package org.apache.texera.amber.engine.architecture.worker.promisehandlers import com.twitter.util.Future - import org.apache.amber.core.tuple.FinalizePort - import org.apache.amber.engine.architecture.rpc.controlcommands.{AsyncRPCContext, EmptyRequest} - import org.apache.amber.engine.architecture.rpc.controlreturns.EmptyReturn - import org.apache.amber.engine.architecture.worker.DataProcessorRPCHandlerInitializer - import org.apache.amber.error.ErrorUtils.safely - import org.apache.amber.operator.loop.LoopStartOpExec ++import org.apache.texera.amber.operator.loop.LoopStartOpExec + import org.apache.texera.amber.core.tuple.FinalizePort + import org.apache.texera.amber.engine.architecture.rpc.controlcommands.{ + AsyncRPCContext, + EmptyRequest + } + import org.apache.texera.amber.engine.architecture.rpc.controlreturns.EmptyReturn + import org.apache.texera.amber.engine.architecture.worker.DataProcessorRPCHandlerInitializer + import org.apache.texera.amber.error.ErrorUtils.safely trait EndChannelHandler { this: DataProcessorRPCHandlerInitializer => diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/promisehandlers/EndIterationHandler.scala index def9be74ba,0000000000..def9be74ba mode 100644,000000..100644 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/promisehandlers/EndIterationHandler.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/promisehandlers/EndIterationHandler.scala diff --cc amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/promisehandlers/NextIterationHandler.scala index d4f6bdffba,0000000000..d4f6bdffba mode 100644,000000..100644 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/promisehandlers/NextIterationHandler.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/worker/promisehandlers/NextIterationHandler.scala diff --cc common/workflow-core/src/main/scala/org/apache/texera/amber/core/tuple/TupleLike.scala index 8e789fc2cc,040b94977a..c166920ad4 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/tuple/TupleLike.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/tuple/TupleLike.scala @@@ -17,10 -17,9 +17,10 @@@ * under the License. */ - package org.apache.amber.core.tuple + package org.apache.texera.amber.core.tuple - import org.apache.amber.core.virtualidentity.ActorVirtualIdentity - import org.apache.amber.core.workflow.PortIdentity ++import org.apache.texera.amber.core.virtualidentity.ActorVirtualIdentity + import org.apache.texera.amber.core.workflow.PortIdentity import scala.jdk.CollectionConverters.CollectionHasAsScala diff --cc common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala index 777e0a6088,eb319a82d1..d761341f76 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala @@@ -41,14 -48,13 +48,14 @@@ import org.apache.texera.amber.operator HuggingFaceSpamSMSDetectionOpDesc, HuggingFaceTextSummarizationOpDesc } - import org.apache.amber.operator.ifStatement.IfOpDesc - import org.apache.amber.operator.intersect.IntersectOpDesc - import org.apache.amber.operator.intervalJoin.IntervalJoinOpDesc - import org.apache.amber.operator.keywordSearch.KeywordSearchOpDesc - import org.apache.amber.operator.limit.LimitOpDesc - import org.apache.amber.operator.loop.{LoopEndOpDesc, LoopStartOpDesc} - import org.apache.amber.operator.machineLearning.Scorer.MachineLearningScorerOpDesc - import org.apache.amber.operator.machineLearning.sklearnAdvanced.KNNTrainer.{ + import org.apache.texera.amber.operator.ifStatement.IfOpDesc ++import org.apache.texera.amber.operator.loop.{LoopEndOpDesc, LoopStartOpDesc} + import org.apache.texera.amber.operator.intersect.IntersectOpDesc + import org.apache.texera.amber.operator.intervalJoin.IntervalJoinOpDesc + import org.apache.texera.amber.operator.keywordSearch.KeywordSearchOpDesc + import org.apache.texera.amber.operator.limit.LimitOpDesc + import org.apache.texera.amber.operator.machineLearning.Scorer.MachineLearningScorerOpDesc + import org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.KNNTrainer.{ SklearnAdvancedKNNClassifierTrainerOpDesc, SklearnAdvancedKNNRegressorTrainerOpDesc }
