This is an automated email from the ASF dual-hosted git repository.

aglinxinyuan pushed a commit to branch xinyuan-loop-feb
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/xinyuan-loop-feb by this push:
     new a374596844 test(loop): pin that envelope metadata never leaks into 
user-facing State
a374596844 is described below

commit a3745968444e03bff55c4d561720c69e1446f059
Author: Xinyuan Lin <[email protected]>
AuthorDate: Thu Jun 4 00:36:52 2026 -0700

    test(loop): pin that envelope metadata never leaks into user-facing State
    
    Addresses PR #4206 review feedback
    (https://github.com/apache/texera/pull/4206#discussion_r3285892237).
    
    The substantive concern -- LoopStartStateURI being written into the
    state object that flows through every operator including user UDFs --
    was closed by 30ba48c39f, which moved loop_counter / LoopStartId /
    LoopStartStateURI onto the typed StateFrame envelope. User-facing
    process_state callbacks only ever receive frame.frame (the inner
    State), never the envelope's scalar fields.
    
    This commit pins that invariant end to end: a StateFrame arriving on
    the consume branch (loop_counter == 0 at a LoopEnd) with non-empty
    envelope metadata must yield a current_input_state whose keys are
    exactly the inner State's -- no LoopStartId, LoopStartStateURI, or
    loop_counter as string keys. If a future refactor accidentally merges
    envelope fields into the inner state, the test breaks.
---
 .../test/python/core/runnables/test_main_loop.py   | 54 ++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/amber/src/test/python/core/runnables/test_main_loop.py 
b/amber/src/test/python/core/runnables/test_main_loop.py
index d98a01d659..feed10de33 100644
--- a/amber/src/test/python/core/runnables/test_main_loop.py
+++ b/amber/src/test/python/core/runnables/test_main_loop.py
@@ -1902,6 +1902,60 @@ class TestMainLoop:
         assert switched == [True], "consume branch must invoke the operator"
         assert emitted == [], "operator returned None -> nothing emitted"
 
+    def test_user_state_excludes_envelope_metadata_on_consume_branch(
+        self, main_loop, monkeypatch
+    ):
+        # Reviewer feedback (#discussion_r3285892237): the envelope's
+        # loop metadata (loop_counter / loop_start_id /
+        # loop_start_state_uri) is internal runtime data and user code
+        # must not see it. After commit 30ba48c39f the URI is a typed
+        # StateFrame field, no longer a key in user State. This test
+        # pins that end to end: a StateFrame whose envelope carries the
+        # metadata must yield a user-facing `current_input_state` that
+        # contains only the inner State's keys -- never the envelope
+        # names.
+        class StubLoopEnd(LoopEndOperator):
+            def condition(self):
+                return False
+
+        main_loop.context.executor_manager.executor = StubLoopEnd()
+        # Standard stubs: emit/save/switch don't fire real work. The
+        # consume branch sets `current_input_state` to the inner State
+        # BEFORE the (stubbed) context switch, so the assertion below
+        # captures exactly what the operator would have received.
+        self._capture_state_emit(main_loop, monkeypatch)
+        # No output from the operator -> no emit work after consume.
+        monkeypatch.setattr(
+            main_loop.context.state_processing_manager,
+            "get_output_state",
+            lambda: None,
+        )
+
+        inner_state = State({"i": 42, "acc": [1, 2, 3]})
+        main_loop._process_state_frame(
+            StateFrame(
+                inner_state,
+                loop_counter=0,
+                loop_start_id="outer-loop",
+                loop_start_state_uri="vfs:///wf/state/outer",
+            )
+        )
+
+        # The runtime captured the envelope metadata onto its own
+        # instance state...
+        assert main_loop._loop_start_id == "outer-loop"
+        assert main_loop._loop_start_state_uri == "vfs:///wf/state/outer"
+        # ...but never wrote it into the user-facing State that the
+        # operator's process_state receives.
+        passed_to_operator = (
+            main_loop.context.state_processing_manager.current_input_state
+        )
+        assert isinstance(passed_to_operator, State)
+        assert set(passed_to_operator.keys()) == {"i", "acc"}
+        assert "LoopStartId" not in passed_to_operator
+        assert "LoopStartStateURI" not in passed_to_operator
+        assert "loop_counter" not in passed_to_operator
+
     # ------------------------------------------------------------------ #
     # _compute_loop_start_id / _jump_to_loop_start
     #

Reply via email to