This is an automated email from the ASF dual-hosted git repository. github-merge-queue[bot] pushed a commit to branch gh-readonly-queue/main/pr-5768-0eb7baa6e5bb9b4b27cb6e0d7d03704e0c3a6786 in repository https://gitbox.apache.org/repos/asf/texera.git
commit eba14a67f15837ab4803d656e5948c28357dc9c6 Author: Xinyuan Lin <[email protected]> AuthorDate: Fri Jun 19 18:24:04 2026 -0700 test(workflow-operator): add unit test coverage for SklearnAdvanced trainer descriptors (#5768) ### What changes were proposed in this PR? Pin behavior of four previously-uncovered sklearn-trainer descriptors in `common/workflow-operator/operator/machineLearning/sklearnAdvanced/`. Each is a 30-line override of `SklearnMLOperatorDescriptor` that contributes just two values: the Python `import` statement and the operator-info label. Drift in either silently breaks generated Python code or the UI label. No production-code changes. | Spec | Source class | Tests | | --- | --- | --- | | `SklearnAdvancedKNNClassifierTrainerOpDescSpec` | `SklearnAdvancedKNNClassifierTrainerOpDesc` | 5 | | `SklearnAdvancedKNNRegressorTrainerOpDescSpec` | `SklearnAdvancedKNNRegressorTrainerOpDesc` | 6 | | `SklearnAdvancedSVCTrainerOpDescSpec` | `SklearnAdvancedSVCTrainerOpDesc` | 5 | | `SklearnAdvancedSVRTrainerOpDescSpec` | `SklearnAdvancedSVRTrainerOpDesc` | 6 | All four spec files follow the `<srcClassName>Spec.scala` one-to-one convention. **Behavior pinned (per descriptor)** | Surface | Contract | | --- | --- | | `getImportStatements` | exact canonical Python import (`KNeighborsClassifier` / `KNeighborsRegressor` / `SVC` / `SVR` from the appropriate sklearn module) | | `getOperatorInfo` | exact canonical label (`"KNN Classifier"` / `"KNN Regressor"` / `"SVM Classifier"` / `"SVM Regressor"`) | | Stability across two instances | both methods return the same string regardless of which instance is queried | | Type assignability | extends `SklearnMLOperatorDescriptor[ParamsT]` (compile-time enforced through a typed `val` binding) | | Type-pattern matching | `case _: SklearnMLOperatorDescriptor[_]` matches a concrete instance | The Regressor spec additionally cross-checks against the Classifier sibling (and SVR vs SVC) — catches copy-paste regressions where one subclass accidentally returned the other's strings. ### Any related issues, documentation, discussions? Closes #5765. ### How was this PR tested? Pure unit-test additions; verified locally with: - `sbt "WorkflowOperator/testOnly org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.KNNTrainer.SklearnAdvancedKNNClassifierTrainerOpDescSpec org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.KNNTrainer.SklearnAdvancedKNNRegressorTrainerOpDescSpec org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.SVCTrainer.SklearnAdvancedSVCTrainerOpDescSpec org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.SVRTrainer.SklearnAdvancedSVRTrainerOpDescSpec"` — 22 tests, all green - `sbt scalafmtCheckAll` — clean - CI to confirm ### Was this PR authored or co-authored using generative AI tooling? Generated-by: Claude Code (Opus 4.7 [1M context]) --- ...arnAdvancedKNNClassifierTrainerOpDescSpec.scala | 61 +++++++++++++++++++ ...earnAdvancedKNNRegressorTrainerOpDescSpec.scala | 69 ++++++++++++++++++++++ .../SklearnAdvancedSVCTrainerOpDescSpec.scala | 60 +++++++++++++++++++ .../SklearnAdvancedSVRTrainerOpDescSpec.scala | 68 +++++++++++++++++++++ 4 files changed, 258 insertions(+) diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/KNNTrainer/SklearnAdvancedKNNClassifierTrainerOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/KNNTrainer/SklearnAdvancedKNNClassifierTrainerOpDescSpec.scala new file mode 100644 index 0000000000..7c62b0cf2c --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/KNNTrainer/SklearnAdvancedKNNClassifierTrainerOpDescSpec.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.KNNTrainer + +import org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.base.SklearnMLOperatorDescriptor +import org.scalatest.flatspec.AnyFlatSpec + +class SklearnAdvancedKNNClassifierTrainerOpDescSpec extends AnyFlatSpec { + + "SklearnAdvancedKNNClassifierTrainerOpDesc.getImportStatements" should + "return the canonical KNeighborsClassifier import" in { + val d = new SklearnAdvancedKNNClassifierTrainerOpDesc + assert(d.getImportStatements == "from sklearn.neighbors import KNeighborsClassifier") + } + + "SklearnAdvancedKNNClassifierTrainerOpDesc.getOperatorInfo" should + "return 'KNN Classifier'" in { + val d = new SklearnAdvancedKNNClassifierTrainerOpDesc + assert(d.getOperatorInfo == "KNN Classifier") + } + + it should "be stable across two instances (no instance-state interaction)" in { + val a = new SklearnAdvancedKNNClassifierTrainerOpDesc + val b = new SklearnAdvancedKNNClassifierTrainerOpDesc + assert(a.getImportStatements == b.getImportStatements) + assert(a.getOperatorInfo == b.getOperatorInfo) + } + + "SklearnAdvancedKNNClassifierTrainerOpDesc" should + "extend SklearnMLOperatorDescriptor (compile-time enforced)" in { + val d: SklearnMLOperatorDescriptor[SklearnAdvancedKNNParameters] = + new SklearnAdvancedKNNClassifierTrainerOpDesc + assert(d.getImportStatements.contains("KNeighborsClassifier")) + } + + it should "be matchable via the SklearnMLOperatorDescriptor type-pattern" in { + val any: AnyRef = new SklearnAdvancedKNNClassifierTrainerOpDesc + val matched = any match { + case _: SklearnMLOperatorDescriptor[_] => true + case _ => false + } + assert(matched) + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/KNNTrainer/SklearnAdvancedKNNRegressorTrainerOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/KNNTrainer/SklearnAdvancedKNNRegressorTrainerOpDescSpec.scala new file mode 100644 index 0000000000..0715f120c2 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/KNNTrainer/SklearnAdvancedKNNRegressorTrainerOpDescSpec.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.KNNTrainer + +import org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.base.SklearnMLOperatorDescriptor +import org.scalatest.flatspec.AnyFlatSpec + +class SklearnAdvancedKNNRegressorTrainerOpDescSpec extends AnyFlatSpec { + + "SklearnAdvancedKNNRegressorTrainerOpDesc.getImportStatements" should + "return the canonical KNeighborsRegressor import" in { + val d = new SklearnAdvancedKNNRegressorTrainerOpDesc + assert(d.getImportStatements == "from sklearn.neighbors import KNeighborsRegressor") + } + + "SklearnAdvancedKNNRegressorTrainerOpDesc.getOperatorInfo" should "return 'KNN Regressor'" in { + val d = new SklearnAdvancedKNNRegressorTrainerOpDesc + assert(d.getOperatorInfo == "KNN Regressor") + } + + it should "be stable across two instances (no instance-state interaction)" in { + val a = new SklearnAdvancedKNNRegressorTrainerOpDesc + val b = new SklearnAdvancedKNNRegressorTrainerOpDesc + assert(a.getImportStatements == b.getImportStatements) + assert(a.getOperatorInfo == b.getOperatorInfo) + } + + "SklearnAdvancedKNNRegressorTrainerOpDesc" should + "extend SklearnMLOperatorDescriptor (compile-time enforced)" in { + val d: SklearnMLOperatorDescriptor[SklearnAdvancedKNNParameters] = + new SklearnAdvancedKNNRegressorTrainerOpDesc + assert(d.getImportStatements.contains("KNeighborsRegressor")) + } + + it should "be matchable via the SklearnMLOperatorDescriptor type-pattern" in { + val any: AnyRef = new SklearnAdvancedKNNRegressorTrainerOpDesc + val matched = any match { + case _: SklearnMLOperatorDescriptor[_] => true + case _ => false + } + assert(matched) + } + + it should "differ from the Classifier sibling on both methods" in { + // Catches a copy-paste regression where the Regressor accidentally + // returned the Classifier's strings (or vice-versa). + val regressor = new SklearnAdvancedKNNRegressorTrainerOpDesc + val classifier = new SklearnAdvancedKNNClassifierTrainerOpDesc + assert(regressor.getImportStatements != classifier.getImportStatements) + assert(regressor.getOperatorInfo != classifier.getOperatorInfo) + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/SVCTrainer/SklearnAdvancedSVCTrainerOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/SVCTrainer/SklearnAdvancedSVCTrainerOpDescSpec.scala new file mode 100644 index 0000000000..c554dae6db --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/SVCTrainer/SklearnAdvancedSVCTrainerOpDescSpec.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.SVCTrainer + +import org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.base.SklearnMLOperatorDescriptor +import org.scalatest.flatspec.AnyFlatSpec + +class SklearnAdvancedSVCTrainerOpDescSpec extends AnyFlatSpec { + + "SklearnAdvancedSVCTrainerOpDesc.getImportStatements" should + "return the canonical SVC import (from sklearn.svm)" in { + val d = new SklearnAdvancedSVCTrainerOpDesc + assert(d.getImportStatements == "from sklearn.svm import SVC") + } + + "SklearnAdvancedSVCTrainerOpDesc.getOperatorInfo" should "return 'SVM Classifier'" in { + val d = new SklearnAdvancedSVCTrainerOpDesc + assert(d.getOperatorInfo == "SVM Classifier") + } + + it should "be stable across two instances (no instance-state interaction)" in { + val a = new SklearnAdvancedSVCTrainerOpDesc + val b = new SklearnAdvancedSVCTrainerOpDesc + assert(a.getImportStatements == b.getImportStatements) + assert(a.getOperatorInfo == b.getOperatorInfo) + } + + "SklearnAdvancedSVCTrainerOpDesc" should + "extend SklearnMLOperatorDescriptor (compile-time enforced)" in { + val d: SklearnMLOperatorDescriptor[SklearnAdvancedSVCParameters] = + new SklearnAdvancedSVCTrainerOpDesc + assert(d.getImportStatements.contains("SVC")) + } + + it should "be matchable via the SklearnMLOperatorDescriptor type-pattern" in { + val any: AnyRef = new SklearnAdvancedSVCTrainerOpDesc + val matched = any match { + case _: SklearnMLOperatorDescriptor[_] => true + case _ => false + } + assert(matched) + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/SVRTrainer/SklearnAdvancedSVRTrainerOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/SVRTrainer/SklearnAdvancedSVRTrainerOpDescSpec.scala new file mode 100644 index 0000000000..80e7c87cc0 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/SVRTrainer/SklearnAdvancedSVRTrainerOpDescSpec.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.SVRTrainer + +import org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.SVCTrainer.SklearnAdvancedSVCTrainerOpDesc +import org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.base.SklearnMLOperatorDescriptor +import org.scalatest.flatspec.AnyFlatSpec + +class SklearnAdvancedSVRTrainerOpDescSpec extends AnyFlatSpec { + + "SklearnAdvancedSVRTrainerOpDesc.getImportStatements" should + "return the canonical SVR import (from sklearn.svm)" in { + val d = new SklearnAdvancedSVRTrainerOpDesc + assert(d.getImportStatements == "from sklearn.svm import SVR") + } + + "SklearnAdvancedSVRTrainerOpDesc.getOperatorInfo" should "return 'SVM Regressor'" in { + val d = new SklearnAdvancedSVRTrainerOpDesc + assert(d.getOperatorInfo == "SVM Regressor") + } + + it should "be stable across two instances (no instance-state interaction)" in { + val a = new SklearnAdvancedSVRTrainerOpDesc + val b = new SklearnAdvancedSVRTrainerOpDesc + assert(a.getImportStatements == b.getImportStatements) + assert(a.getOperatorInfo == b.getOperatorInfo) + } + + "SklearnAdvancedSVRTrainerOpDesc" should + "extend SklearnMLOperatorDescriptor (compile-time enforced)" in { + val d: SklearnMLOperatorDescriptor[SklearnAdvancedSVRParameters] = + new SklearnAdvancedSVRTrainerOpDesc + assert(d.getImportStatements.contains("SVR")) + } + + it should "be matchable via the SklearnMLOperatorDescriptor type-pattern" in { + val any: AnyRef = new SklearnAdvancedSVRTrainerOpDesc + val matched = any match { + case _: SklearnMLOperatorDescriptor[_] => true + case _ => false + } + assert(matched) + } + + it should "differ from the SVC sibling on both methods" in { + val regressor = new SklearnAdvancedSVRTrainerOpDesc + val classifier = new SklearnAdvancedSVCTrainerOpDesc + assert(regressor.getImportStatements != classifier.getImportStatements) + assert(regressor.getOperatorInfo != classifier.getOperatorInfo) + } +}
