This is an automated email from the ASF dual-hosted git repository. github-merge-queue[bot] pushed a commit to branch gh-readonly-queue/main/pr-5923-e55d1abb07125d6fee698821818cc7785f7c3120 in repository https://gitbox.apache.org/repos/asf/texera.git
commit 0edb26b840628f022ce669cc9641c4449868f92b Author: Xinyuan Lin <[email protected]> AuthorDate: Wed Jun 24 13:28:55 2026 -0700 test(workflow-operator): add unit test coverage for SQL source descriptors (MySQL, PostgreSQL, AsterixDB) (#5923) ### What changes were proposed in this PR? Pin behavior of three previously-untested SQL source descriptors in `common/workflow-operator`, plus a one-word grammar fix surfaced by review. | Spec | Source class | Tests | | --- | --- | --- | | `MySQLSourceOpDescSpec` | `MySQLSourceOpDesc` | 5 | | `PostgreSQLSourceOpDescSpec` | `PostgreSQLSourceOpDesc` | 5 | | `AsterixDBSourceOpDescSpec` | `AsterixDBSourceOpDesc` | 5 | **Production fix (`AsterixDBSourceOpDesc`)** Corrected the user-facing `operatorInfo` description grammar: `"Read data from a AsterixDB instance"` → `"Read data from an AsterixDB instance"` (per review — avoids pinning the typo as contract). Assertion updated to match. **Behavior pinned** | Surface | Contract | | --- | --- | | `operatorInfo` | exact name + description; Database Connector group; 0 inputs / 1 output | | field defaults | runtime defaults (host/port/etc. `null`, `limit`/`offset` `None`, `keywordSearch`/`progressive` `Some(false)`, `interval` `0L`); AsterixDB geo/regex/filter toggles default `Some(false)` with empty lists | | `sourceSchema()` | `null` before a connection is configured (IO-free short-circuit) | | `getPhysicalOp` | wires `OpExecWithClassName` for the matching `*OpExec`, no input port / one output port, IO-free (lazy schema closure) | | Round-trip | config fields preserved through the polymorphic `LogicalOp` base. **Credential handling is now explicit:** MySQL/PostgreSQL persist `username`/`password` in plaintext, while AsterixDB drops them via `@JsonIgnoreProperties` — the specs assert both behaviors. | Note: `MySQLSourceOpDesc` is `@deprecated` (no longer executable) but retained so legacy workflows still deserialize; the spec is annotated `@nowarn("cat=deprecation")`. The MySQL/PostgreSQL plaintext-credential persistence (vs AsterixDB dropping them) is a real asymmetry flagged in review; it's now made visible by the tests and tracked as a separate security follow-up rather than changed here (altering it has backward-compat implications for saved workflows). ### Any related issues, documentation, discussions? Part of the ongoing `workflow-operator` unit-test coverage effort. ### How was this PR tested? - `sbt "WorkflowOperator/testOnly *MySQLSourceOpDescSpec *PostgreSQLSourceOpDescSpec *AsterixDBSourceOpDescSpec"` — 15 tests, all green - `sbt "WorkflowOperator/scalafmtCheck"`, `"WorkflowOperator/Test/scalafmtCheck"`, `"WorkflowOperator/scalafixAll --check"` — clean - CI to confirm ### Was this PR authored or co-authored using generative AI tooling? Generated-by: Claude Code (Opus 4.8 [1M context]) --- .../sql/asterixdb/AsterixDBSourceOpDesc.scala | 2 +- .../sql/asterixdb/AsterixDBSourceOpDescSpec.scala | 102 ++++++++++++++++++++ .../source/sql/mysql/MySQLSourceOpDescSpec.scala | 104 +++++++++++++++++++++ .../postgresql/PostgreSQLSourceOpDescSpec.scala | 99 ++++++++++++++++++++ 4 files changed, 306 insertions(+), 1 deletion(-) diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/sql/asterixdb/AsterixDBSourceOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/sql/asterixdb/AsterixDBSourceOpDesc.scala index 8a679fd2b4..d12d334e70 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/sql/asterixdb/AsterixDBSourceOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/sql/asterixdb/AsterixDBSourceOpDesc.scala @@ -130,7 +130,7 @@ class AsterixDBSourceOpDesc extends SQLSourceOpDesc { override def operatorInfo: OperatorInfo = OperatorInfo( "AsterixDB Source", - "Read data from a AsterixDB instance", + "Read data from an AsterixDB instance", OperatorGroupConstants.DATABASE_GROUP, inputPorts = List.empty, outputPorts = List(OutputPort()) diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/sql/asterixdb/AsterixDBSourceOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/sql/asterixdb/AsterixDBSourceOpDescSpec.scala new file mode 100644 index 0000000000..84f3e6a6c9 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/sql/asterixdb/AsterixDBSourceOpDescSpec.scala @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.source.sql.asterixdb + +import org.apache.texera.amber.core.executor.OpExecWithClassName +import org.apache.texera.amber.core.workflow.WorkflowContext.{ + DEFAULT_EXECUTION_ID, + DEFAULT_WORKFLOW_ID +} +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class AsterixDBSourceOpDescSpec extends AnyFlatSpec with Matchers { + + "AsterixDBSourceOpDesc.operatorInfo" should + "advertise the AsterixDB source in the Database Connector group with no input and one output" in { + val info = (new AsterixDBSourceOpDesc).operatorInfo + info.userFriendlyName shouldBe "AsterixDB Source" + info.operatorDescription shouldBe "Read data from an AsterixDB instance" + info.operatorGroupName shouldBe OperatorGroupConstants.DATABASE_GROUP + info.inputPorts shouldBe empty + info.outputPorts should have length 1 + } + + "AsterixDBSourceOpDesc" should "default its geo/regex/filter and connection fields" in { + val d = new AsterixDBSourceOpDesc + d.geoSearch shouldBe Some(false) + d.geoSearchByColumns shouldBe empty + d.geoSearchBoundingBox shouldBe empty + d.regexSearch shouldBe Some(false) + d.regexSearchByColumn shouldBe None + d.regex shouldBe None + d.filterCondition shouldBe Some(false) + d.filterPredicates shouldBe empty + d.host shouldBe null + d.interval shouldBe 0L + } + + "AsterixDBSourceOpDesc.sourceSchema" should "be null before a connection is configured" in { + (new AsterixDBSourceOpDesc).sourceSchema() shouldBe null + } + + "AsterixDBSourceOpDesc.getPhysicalOp" should + "wire the AsterixDB exec as a source op with no input port and one output port" in { + val d = new AsterixDBSourceOpDesc + val physical = d.getPhysicalOp(DEFAULT_WORKFLOW_ID, DEFAULT_EXECUTION_ID) + physical.opExecInitInfo match { + case OpExecWithClassName(className, _) => + className shouldBe "org.apache.texera.amber.operator.source.sql.asterixdb.AsterixDBSourceOpExec" + case other => fail(s"expected OpExecWithClassName, got $other") + } + physical.inputPorts.keySet shouldBe empty + physical.outputPorts.keySet shouldBe d.operatorInfo.outputPorts.map(_.id).toSet + } + + "AsterixDBSourceOpDesc" should + "round-trip its config fields and omit the ignored credentials" in { + val d = new AsterixDBSourceOpDesc + d.host = "localhost" + d.database = "db" + d.table = "t" + d.username = "secret-user" + d.password = "secret-pass" + d.regex = Some("a.*") + d.geoSearchByColumns = List("lonlat") + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"AsterixDBSource\"") + // username/password are dropped via @JsonIgnoreProperties on this subclass. + json should not include "secret-user" + json should not include "secret-pass" + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[AsterixDBSourceOpDesc] + val r = restored.asInstanceOf[AsterixDBSourceOpDesc] + r.host shouldBe "localhost" + r.database shouldBe "db" + r.table shouldBe "t" + r.regex shouldBe Some("a.*") + r.geoSearchByColumns shouldBe List("lonlat") + r.username shouldBe null + r.password shouldBe null + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/sql/mysql/MySQLSourceOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/sql/mysql/MySQLSourceOpDescSpec.scala new file mode 100644 index 0000000000..cde2c63680 --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/sql/mysql/MySQLSourceOpDescSpec.scala @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.source.sql.mysql + +import org.apache.texera.amber.core.executor.OpExecWithClassName +import org.apache.texera.amber.core.workflow.WorkflowContext.{ + DEFAULT_EXECUTION_ID, + DEFAULT_WORKFLOW_ID +} +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import scala.annotation.nowarn + +// MySQLSourceOpDesc is @deprecated (no longer executable) but retained so legacy +// workflows still deserialize; the coverage below pins that backward-compatible contract. +@nowarn("cat=deprecation") +class MySQLSourceOpDescSpec extends AnyFlatSpec with Matchers { + + "MySQLSourceOpDesc.operatorInfo" should + "advertise the MySQL source in the Database Connector group with no input and one output" in { + val info = (new MySQLSourceOpDesc).operatorInfo + info.userFriendlyName shouldBe "MySQL Source" + info.operatorDescription shouldBe "Read data from a MySQL instance" + info.operatorGroupName shouldBe OperatorGroupConstants.DATABASE_GROUP + info.inputPorts shouldBe empty + info.outputPorts should have length 1 + } + + "MySQLSourceOpDesc" should "default its connection and query fields" in { + val d = new MySQLSourceOpDesc + d.host shouldBe null + d.port shouldBe null + d.database shouldBe null + d.table shouldBe null + d.limit shouldBe None + d.offset shouldBe None + d.keywordSearch shouldBe Some(false) + d.progressive shouldBe Some(false) + d.interval shouldBe 0L + } + + "MySQLSourceOpDesc.sourceSchema" should "be null before a connection is configured" in { + (new MySQLSourceOpDesc).sourceSchema() shouldBe null + } + + "MySQLSourceOpDesc.getPhysicalOp" should + "wire the MySQL exec as a source op with no input port and one output port" in { + val d = new MySQLSourceOpDesc + val physical = d.getPhysicalOp(DEFAULT_WORKFLOW_ID, DEFAULT_EXECUTION_ID) + physical.opExecInitInfo match { + case OpExecWithClassName(className, _) => + className shouldBe "org.apache.texera.amber.operator.source.sql.mysql.MySQLSourceOpExec" + case other => fail(s"expected OpExecWithClassName, got $other") + } + physical.inputPorts.keySet shouldBe empty + physical.outputPorts.keySet shouldBe d.operatorInfo.outputPorts.map(_.id).toSet + } + + "MySQLSourceOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new MySQLSourceOpDesc + d.host = "localhost" + d.database = "db" + d.table = "t" + d.username = "secret-user" + d.password = "secret-pass" + d.limit = Some(5L) + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"MySQLSource\"") + // Unlike AsterixDBSourceOpDesc (which drops credentials via @JsonIgnoreProperties), the SQL + // base persists username/password in plaintext; pin that behavior so any future change is visible. + json should include("secret-user") + json should include("secret-pass") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[MySQLSourceOpDesc] + val r = restored.asInstanceOf[MySQLSourceOpDesc] + r.host shouldBe "localhost" + r.database shouldBe "db" + r.table shouldBe "t" + r.username shouldBe "secret-user" + r.password shouldBe "secret-pass" + r.limit shouldBe Some(5L) + } +} diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/sql/postgresql/PostgreSQLSourceOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/sql/postgresql/PostgreSQLSourceOpDescSpec.scala new file mode 100644 index 0000000000..df158479cf --- /dev/null +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/sql/postgresql/PostgreSQLSourceOpDescSpec.scala @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.source.sql.postgresql + +import org.apache.texera.amber.core.executor.OpExecWithClassName +import org.apache.texera.amber.core.workflow.WorkflowContext.{ + DEFAULT_EXECUTION_ID, + DEFAULT_WORKFLOW_ID +} +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class PostgreSQLSourceOpDescSpec extends AnyFlatSpec with Matchers { + + "PostgreSQLSourceOpDesc.operatorInfo" should + "advertise the PostgreSQL source in the Database Connector group with no input and one output" in { + val info = (new PostgreSQLSourceOpDesc).operatorInfo + info.userFriendlyName shouldBe "PostgreSQL Source" + info.operatorDescription shouldBe "Read data from a PostgreSQL instance" + info.operatorGroupName shouldBe OperatorGroupConstants.DATABASE_GROUP + info.inputPorts shouldBe empty + info.outputPorts should have length 1 + } + + "PostgreSQLSourceOpDesc" should "default its connection and query fields" in { + val d = new PostgreSQLSourceOpDesc + d.host shouldBe null + d.port shouldBe null + d.database shouldBe null + d.table shouldBe null + d.limit shouldBe None + d.offset shouldBe None + d.keywordSearch shouldBe Some(false) + d.progressive shouldBe Some(false) + d.interval shouldBe 0L + } + + "PostgreSQLSourceOpDesc.sourceSchema" should "be null before a connection is configured" in { + (new PostgreSQLSourceOpDesc).sourceSchema() shouldBe null + } + + "PostgreSQLSourceOpDesc.getPhysicalOp" should + "wire the PostgreSQL exec as a source op with no input port and one output port" in { + val d = new PostgreSQLSourceOpDesc + val physical = d.getPhysicalOp(DEFAULT_WORKFLOW_ID, DEFAULT_EXECUTION_ID) + physical.opExecInitInfo match { + case OpExecWithClassName(className, _) => + className shouldBe "org.apache.texera.amber.operator.source.sql.postgresql.PostgreSQLSourceOpExec" + case other => fail(s"expected OpExecWithClassName, got $other") + } + physical.inputPorts.keySet shouldBe empty + physical.outputPorts.keySet shouldBe d.operatorInfo.outputPorts.map(_.id).toSet + } + + "PostgreSQLSourceOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new PostgreSQLSourceOpDesc + d.host = "localhost" + d.database = "db" + d.table = "t" + d.username = "secret-user" + d.password = "secret-pass" + d.limit = Some(5L) + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"PostgreSQLSource\"") + // Unlike AsterixDBSourceOpDesc (which drops credentials via @JsonIgnoreProperties), the SQL + // base persists username/password in plaintext; pin that behavior so any future change is visible. + json should include("secret-user") + json should include("secret-pass") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[PostgreSQLSourceOpDesc] + val r = restored.asInstanceOf[PostgreSQLSourceOpDesc] + r.host shouldBe "localhost" + r.database shouldBe "db" + r.table shouldBe "t" + r.username shouldBe "secret-user" + r.password shouldBe "secret-pass" + r.limit shouldBe Some(5L) + } +}
