aglinxinyuan commented on code in PR #4827: URL: https://github.com/apache/texera/pull/4827#discussion_r3177853914
########## common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnOpDescRegistrySpec.scala: ########## @@ -0,0 +1,339 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn + +import org.apache.texera.amber.operator.sklearn.training._ +import org.scalatest.flatspec.AnyFlatSpec + +/** + * Pins the wiring (Python import statement + user-friendly model name) for + * every concrete `SklearnClassifierOpDesc` and `SklearnTrainingOpDesc`. A + * typo in either string would silently misroute downstream UI labels and + * breakage of the generated Python pipeline. + */ +class SklearnOpDescRegistrySpec extends AnyFlatSpec { + + // --------------------------------------------------------------------------- + // Classifier registry (24 concrete SklearnClassifierOpDesc subclasses) + // --------------------------------------------------------------------------- + + private val classifierEntries: List[(SklearnClassifierOpDesc, String, String)] = List( + ( + new SklearnAdaptiveBoostingOpDesc(), + "from sklearn.ensemble import AdaBoostClassifier", + "Adaptive Boosting" + ), + (new SklearnBaggingOpDesc(), "from sklearn.ensemble import BaggingClassifier", "Bagging"), + ( + new SklearnBernoulliNaiveBayesOpDesc(), Review Comment: Done in d37d1ae5e8 — added `SklearnDummyClassifierOpDesc` to `classifierEntries` and updated the comment to reflect 25 subclasses. ########## common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnOpDescRegistrySpec.scala: ########## @@ -0,0 +1,339 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.sklearn + +import org.apache.texera.amber.operator.sklearn.training._ +import org.scalatest.flatspec.AnyFlatSpec + +/** + * Pins the wiring (Python import statement + user-friendly model name) for + * every concrete `SklearnClassifierOpDesc` and `SklearnTrainingOpDesc`. A + * typo in either string would silently misroute downstream UI labels and + * breakage of the generated Python pipeline. + */ +class SklearnOpDescRegistrySpec extends AnyFlatSpec { + + // --------------------------------------------------------------------------- + // Classifier registry (24 concrete SklearnClassifierOpDesc subclasses) + // --------------------------------------------------------------------------- + + private val classifierEntries: List[(SklearnClassifierOpDesc, String, String)] = List( + ( + new SklearnAdaptiveBoostingOpDesc(), + "from sklearn.ensemble import AdaBoostClassifier", + "Adaptive Boosting" + ), + (new SklearnBaggingOpDesc(), "from sklearn.ensemble import BaggingClassifier", "Bagging"), + ( + new SklearnBernoulliNaiveBayesOpDesc(), + "from sklearn.naive_bayes import BernoulliNB", + "Bernoulli Naive Bayes" + ), + ( + new SklearnComplementNaiveBayesOpDesc(), + "from sklearn.naive_bayes import ComplementNB", + "Complement Naive Bayes" + ), + ( + new SklearnDecisionTreeOpDesc(), + "from sklearn.tree import DecisionTreeClassifier", + "Decision Tree" + ), + (new SklearnExtraTreeOpDesc(), "from sklearn.tree import ExtraTreeClassifier", "Extra Tree"), + ( + new SklearnExtraTreesOpDesc(), + "from sklearn.ensemble import ExtraTreesClassifier", + "Extra Trees" + ), + ( + new SklearnGaussianNaiveBayesOpDesc(), + "from sklearn.naive_bayes import GaussianNB", + "Gaussian Naive Bayes" + ), + ( + new SklearnGradientBoostingOpDesc(), + "from sklearn.ensemble import GradientBoostingClassifier", + "Gradient Boosting" + ), + ( + new SklearnKNNOpDesc(), + "from sklearn.neighbors import KNeighborsClassifier", + "K-nearest Neighbors" + ), + ( + new SklearnLinearSVMOpDesc(), + "from sklearn.svm import LinearSVC", + "Linear Support Vector Machine" + ), + ( + new SklearnLogisticRegressionCVOpDesc(), + "from sklearn.linear_model import LogisticRegressionCV", + "Logistic Regression Cross Validation" + ), + ( + new SklearnLogisticRegressionOpDesc(), + "from sklearn.linear_model import LogisticRegression", + "Logistic Regression" + ), + ( + new SklearnMultiLayerPerceptronOpDesc(), + "from sklearn.neural_network import MLPClassifier", + "Multi-layer Perceptron" + ), + ( + new SklearnMultinomialNaiveBayesOpDesc(), + "from sklearn.naive_bayes import MultinomialNB", + "Multinomial Naive Bayes" + ), + ( + new SklearnNearestCentroidOpDesc(), + "from sklearn.neighbors import NearestCentroid", + "Nearest Centroid" + ), + ( + new SklearnPassiveAggressiveOpDesc(), + "from sklearn.linear_model import PassiveAggressiveClassifier", + "Passive Aggressive" + ), + ( + new SklearnPerceptronOpDesc(), + "from sklearn.linear_model import Perceptron", + "Linear Perceptron" + ), + ( + new SklearnProbabilityCalibrationOpDesc(), + "from sklearn.calibration import CalibratedClassifierCV", + "Probability Calibration" + ), + ( + new SklearnRandomForestOpDesc(), + "from sklearn.ensemble import RandomForestClassifier", + "Random Forest" + ), + ( + new SklearnRidgeCVOpDesc(), + "from sklearn.linear_model import RidgeClassifierCV", + "Ridge Regression Cross Validation" + ), + ( + new SklearnRidgeOpDesc(), + "from sklearn.linear_model import RidgeClassifier", + "Ridge Regression" + ), + ( + new SklearnSDGOpDesc(), + "from sklearn.linear_model import SGDClassifier", + "Stochastic Gradient Descent" + ), + (new SklearnSVMOpDesc(), "from sklearn.svm import SVC", "Support Vector Machine") + ) + + classifierEntries.foreach { + case (desc, expectedImport, expectedName) => + val cls = desc.getClass.getSimpleName + cls should s"return import statement '$expectedImport'" in { + assert(desc.getImportStatements == expectedImport) + } + it should s"return user-friendly model name '$expectedName'" in { + assert(desc.getUserFriendlyModelName == expectedName) + } + } + + "SklearnClassifierOpDesc base class" should "default to empty strings before subclass overrides" in { + val anonymous = new SklearnClassifierOpDesc {} + assert(anonymous.getImportStatements == "") + assert(anonymous.getUserFriendlyModelName == "") + } + + it should "embed the import statement into generatePythonCode for a concrete subclass" in { + val desc = new SklearnLogisticRegressionOpDesc() + desc.target = "y" + desc.countVectorizer = false + // `tfidfTransformer` is a val on the base class, defaults to false. + val code = desc.generatePythonCode() + assert(code.contains("from sklearn.linear_model import LogisticRegression")) + // Classifier OpDescs emit a UDFTableOperator pipeline. + assert(code.contains("ProcessTableOperator")) + } + + // --------------------------------------------------------------------------- + // Training registry (26 concrete SklearnTrainingOpDesc subclasses) + // --------------------------------------------------------------------------- + + private val trainingEntries: List[(SklearnTrainingOpDesc, String, String)] = List( + ( + new SklearnTrainingAdaptiveBoostingOpDesc(), + "from sklearn.ensemble import AdaBoostClassifier", + "Training: Adaptive Boosting" + ), + ( + new SklearnTrainingBaggingOpDesc(), + "from sklearn.ensemble import BaggingClassifier", + "Training: Bagging Training" // current source value (typo-style duplication preserved) + ), + ( + new SklearnTrainingBernoulliNaiveBayesOpDesc(), + "from sklearn.naive_bayes import BernoulliNB", + "Training: Bernoulli Naive Bayes" + ), + ( + new SklearnTrainingComplementNaiveBayesOpDesc(), + "from sklearn.naive_bayes import ComplementNB", + "Training: Complement Naive Bayes" + ), + ( + new SklearnTrainingDecisionTreeOpDesc(), + "from sklearn.tree import DecisionTreeClassifier", + "Training: Decision Tree" + ), + ( + new SklearnTrainingDummyClassifierOpDesc(), + "from sklearn.dummy import dummy", // current source value (typo preserved — should be DummyClassifier) Review Comment: Done in d37d1ae5e8 — corrected both `SklearnDummyClassifierOpDesc` and `SklearnTrainingDummyClassifierOpDesc` to import `DummyClassifier` (the actual sklearn symbol) and updated the spec expectations accordingly. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
