Yicong-Huang commented on code in PR #4712: URL: https://github.com/apache/texera/pull/4712#discussion_r3177405530
########## common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala: ########## @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util + +import org.apache.texera.amber.core.virtualidentity.{ + ActorVirtualIdentity, + OperatorIdentity, + PhysicalOpIdentity, + WorkflowIdentity +} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class VirtualIdentityUtilsSpec extends AnyFlatSpec with Matchers { + + // ----- createWorkerIdentity ----- + + "createWorkerIdentity (raw fields)" should "format Worker:WF<id>-<op>-<layer>-<workerIdx>" in { + val actor = VirtualIdentityUtils.createWorkerIdentity( + WorkflowIdentity(7), + operator = "myOp", + layerName = "main", + workerId = 3 + ) + actor.name shouldBe "Worker:WF7-myOp-main-3" + } + + "createWorkerIdentity (PhysicalOpIdentity overload)" should "delegate to the same encoded format" in { + val physicalOpId = PhysicalOpIdentity(OperatorIdentity("myOp"), "main") + val actor = VirtualIdentityUtils.createWorkerIdentity( + WorkflowIdentity(7), + physicalOpId, + workerId = 3 + ) + actor.name shouldBe "Worker:WF7-myOp-main-3" + } + + // ----- getPhysicalOpId ----- + + "getPhysicalOpId" should "extract operator id and layer name from a worker actor name" in { + val actor = ActorVirtualIdentity("Worker:WF7-myOp-main-3") + val opId = VirtualIdentityUtils.getPhysicalOpId(actor) + opId.logicalOpId.id shouldBe "myOp" + opId.layerName shouldBe "main" + } + + it should "fall back to __DummyOperator/__DummyLayer for non-worker actor names" in { + val controller = ActorVirtualIdentity("CONTROLLER") + val opId = VirtualIdentityUtils.getPhysicalOpId(controller) + opId.logicalOpId.id shouldBe "__DummyOperator" + opId.layerName shouldBe "__DummyLayer" + } + + it should "tolerate operator names that contain hyphens by greedy backtracking" in { + // The operator capture group is `.+` which backtracks to leave the trailing + // `-(\w+)-(\d+)` slots populated. A multi-hyphen operator name must still + // round-trip without losing characters from the operator itself. + val actor = ActorVirtualIdentity("Worker:WF1-multi-part-op-main-0") + val opId = VirtualIdentityUtils.getPhysicalOpId(actor) + opId.logicalOpId.id shouldBe "multi-part-op" + opId.layerName shouldBe "main" + } + + // ----- getWorkerIndex ----- + + "getWorkerIndex" should "return the trailing numeric workerId from a worker actor name" in { + val actor = ActorVirtualIdentity("Worker:WF7-myOp-main-42") + VirtualIdentityUtils.getWorkerIndex(actor) shouldBe 42 + } + + // Intentionally not covered: actor names that do not match workerNamePattern + // make getWorkerIndex throw scala.MatchError because the method has no + // fallback case. See the "Potential bug" note in the PR description. Review Comment: Made self-contained in 982c67cd1c: the prose comment that referenced the PR description was replaced by an actual test (`should throw MatchError on non-worker actor names`) plus an inline explanation of why pinning the no-fallback behavior matters. ########## common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala: ########## @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util + +import org.apache.texera.amber.core.virtualidentity.{ + ActorVirtualIdentity, + OperatorIdentity, + PhysicalOpIdentity, + WorkflowIdentity +} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class VirtualIdentityUtilsSpec extends AnyFlatSpec with Matchers { + + // ----- createWorkerIdentity ----- + + "createWorkerIdentity (raw fields)" should "format Worker:WF<id>-<op>-<layer>-<workerIdx>" in { + val actor = VirtualIdentityUtils.createWorkerIdentity( + WorkflowIdentity(7), + operator = "myOp", + layerName = "main", + workerId = 3 + ) + actor.name shouldBe "Worker:WF7-myOp-main-3" + } + + "createWorkerIdentity (PhysicalOpIdentity overload)" should "delegate to the same encoded format" in { + val physicalOpId = PhysicalOpIdentity(OperatorIdentity("myOp"), "main") + val actor = VirtualIdentityUtils.createWorkerIdentity( + WorkflowIdentity(7), + physicalOpId, + workerId = 3 + ) + actor.name shouldBe "Worker:WF7-myOp-main-3" + } + + // ----- getPhysicalOpId ----- + + "getPhysicalOpId" should "extract operator id and layer name from a worker actor name" in { + val actor = ActorVirtualIdentity("Worker:WF7-myOp-main-3") + val opId = VirtualIdentityUtils.getPhysicalOpId(actor) + opId.logicalOpId.id shouldBe "myOp" + opId.layerName shouldBe "main" + } + + it should "fall back to __DummyOperator/__DummyLayer for non-worker actor names" in { + val controller = ActorVirtualIdentity("CONTROLLER") + val opId = VirtualIdentityUtils.getPhysicalOpId(controller) + opId.logicalOpId.id shouldBe "__DummyOperator" + opId.layerName shouldBe "__DummyLayer" + } + + it should "tolerate operator names that contain hyphens by greedy backtracking" in { + // The operator capture group is `.+` which backtracks to leave the trailing + // `-(\w+)-(\d+)` slots populated. A multi-hyphen operator name must still + // round-trip without losing characters from the operator itself. + val actor = ActorVirtualIdentity("Worker:WF1-multi-part-op-main-0") + val opId = VirtualIdentityUtils.getPhysicalOpId(actor) + opId.logicalOpId.id shouldBe "multi-part-op" + opId.layerName shouldBe "main" + } + Review Comment: Added `should misparse layer names that contain hyphens (current behavior)` in 982c67cd1c. The test feeds `Worker:WF1-myOp-1st-physical-op-3` to `getPhysicalOpId` and asserts the current misparse (operator="myOp-1st-physical", layer="op"), so a future fix that broadens `workerNamePattern` will visibly need to update this spec. ########## common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala: ########## @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util + +import org.apache.texera.amber.core.virtualidentity.{ + ActorVirtualIdentity, + OperatorIdentity, + PhysicalOpIdentity, + WorkflowIdentity +} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class VirtualIdentityUtilsSpec extends AnyFlatSpec with Matchers { + + // ----- createWorkerIdentity ----- + + "createWorkerIdentity (raw fields)" should "format Worker:WF<id>-<op>-<layer>-<workerIdx>" in { + val actor = VirtualIdentityUtils.createWorkerIdentity( + WorkflowIdentity(7), + operator = "myOp", + layerName = "main", + workerId = 3 + ) + actor.name shouldBe "Worker:WF7-myOp-main-3" + } + + "createWorkerIdentity (PhysicalOpIdentity overload)" should "delegate to the same encoded format" in { + val physicalOpId = PhysicalOpIdentity(OperatorIdentity("myOp"), "main") + val actor = VirtualIdentityUtils.createWorkerIdentity( + WorkflowIdentity(7), + physicalOpId, + workerId = 3 + ) + actor.name shouldBe "Worker:WF7-myOp-main-3" + } + + // ----- getPhysicalOpId ----- + + "getPhysicalOpId" should "extract operator id and layer name from a worker actor name" in { + val actor = ActorVirtualIdentity("Worker:WF7-myOp-main-3") + val opId = VirtualIdentityUtils.getPhysicalOpId(actor) + opId.logicalOpId.id shouldBe "myOp" + opId.layerName shouldBe "main" + } + + it should "fall back to __DummyOperator/__DummyLayer for non-worker actor names" in { + val controller = ActorVirtualIdentity("CONTROLLER") + val opId = VirtualIdentityUtils.getPhysicalOpId(controller) + opId.logicalOpId.id shouldBe "__DummyOperator" + opId.layerName shouldBe "__DummyLayer" + } + + it should "tolerate operator names that contain hyphens by greedy backtracking" in { + // The operator capture group is `.+` which backtracks to leave the trailing + // `-(\w+)-(\d+)` slots populated. A multi-hyphen operator name must still + // round-trip without losing characters from the operator itself. + val actor = ActorVirtualIdentity("Worker:WF1-multi-part-op-main-0") + val opId = VirtualIdentityUtils.getPhysicalOpId(actor) + opId.logicalOpId.id shouldBe "multi-part-op" + opId.layerName shouldBe "main" + } + + // ----- getWorkerIndex ----- + + "getWorkerIndex" should "return the trailing numeric workerId from a worker actor name" in { + val actor = ActorVirtualIdentity("Worker:WF7-myOp-main-42") + VirtualIdentityUtils.getWorkerIndex(actor) shouldBe 42 + } + + // Intentionally not covered: actor names that do not match workerNamePattern + // make getWorkerIndex throw scala.MatchError because the method has no + // fallback case. See the "Potential bug" note in the PR description. + Review Comment: Added `should throw MatchError on non-worker actor names (current behavior)` in 982c67cd1c, using `assertThrows[scala.MatchError]` against `ActorVirtualIdentity("CONTROLLER")`. The `getWorkerIndex` no-fallback contract is now pinned. ########## common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala: ########## @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util + +import org.apache.texera.amber.core.virtualidentity.{ + ActorVirtualIdentity, + OperatorIdentity, + PhysicalOpIdentity, + WorkflowIdentity +} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class VirtualIdentityUtilsSpec extends AnyFlatSpec with Matchers { + + // ----- createWorkerIdentity ----- + + "createWorkerIdentity (raw fields)" should "format Worker:WF<id>-<op>-<layer>-<workerIdx>" in { + val actor = VirtualIdentityUtils.createWorkerIdentity( + WorkflowIdentity(7), + operator = "myOp", + layerName = "main", + workerId = 3 + ) + actor.name shouldBe "Worker:WF7-myOp-main-3" + } + + "createWorkerIdentity (PhysicalOpIdentity overload)" should "delegate to the same encoded format" in { + val physicalOpId = PhysicalOpIdentity(OperatorIdentity("myOp"), "main") + val actor = VirtualIdentityUtils.createWorkerIdentity( + WorkflowIdentity(7), + physicalOpId, + workerId = 3 + ) + actor.name shouldBe "Worker:WF7-myOp-main-3" + } + + // ----- getPhysicalOpId ----- + + "getPhysicalOpId" should "extract operator id and layer name from a worker actor name" in { + val actor = ActorVirtualIdentity("Worker:WF7-myOp-main-3") + val opId = VirtualIdentityUtils.getPhysicalOpId(actor) + opId.logicalOpId.id shouldBe "myOp" + opId.layerName shouldBe "main" + } + + it should "fall back to __DummyOperator/__DummyLayer for non-worker actor names" in { + val controller = ActorVirtualIdentity("CONTROLLER") + val opId = VirtualIdentityUtils.getPhysicalOpId(controller) + opId.logicalOpId.id shouldBe "__DummyOperator" + opId.layerName shouldBe "__DummyLayer" + } + + it should "tolerate operator names that contain hyphens by greedy backtracking" in { + // The operator capture group is `.+` which backtracks to leave the trailing + // `-(\w+)-(\d+)` slots populated. A multi-hyphen operator name must still + // round-trip without losing characters from the operator itself. + val actor = ActorVirtualIdentity("Worker:WF1-multi-part-op-main-0") + val opId = VirtualIdentityUtils.getPhysicalOpId(actor) + opId.logicalOpId.id shouldBe "multi-part-op" + opId.layerName shouldBe "main" + } + + // ----- getWorkerIndex ----- + + "getWorkerIndex" should "return the trailing numeric workerId from a worker actor name" in { + val actor = ActorVirtualIdentity("Worker:WF7-myOp-main-42") + VirtualIdentityUtils.getWorkerIndex(actor) shouldBe 42 + } + + // Intentionally not covered: actor names that do not match workerNamePattern + // make getWorkerIndex throw scala.MatchError because the method has no + // fallback case. See the "Potential bug" note in the PR description. + + // ----- toShorterString ----- + + "toShorterString" should "keep operator names <= 6 chars unchanged" in { + val actor = ActorVirtualIdentity("Worker:WF1-myOp-main-0") + VirtualIdentityUtils.toShorterString(actor) shouldBe "WF1-myOp-main-0" Review Comment: Added a `<= 6 chars` boundary case in 982c67cd1c using a six-character operator (`sixSix`). A regression from `length > 6` to `>= 6` would shorten it and fail the spec. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
