This is an automated email from the ASF dual-hosted git repository. github-merge-queue[bot] pushed a commit to branch gh-readonly-queue/main/pr-5648-b21540d1f3433419d0badc19c3de1e6de01ee883 in repository https://gitbox.apache.org/repos/asf/texera.git
commit 01e070fd83dd7fd9a5574964880fe67d7f8f1257 Author: Matthew B. <[email protected]> AuthorDate: Sat Jun 13 22:28:17 2026 -0700 perf(auth): collapse N+1 in getComputingUnitAccess into one join (#5648) ### What changes were proposed in this PR? - Replace the two-DAO lookup in `ComputingUnitAccess.getComputingUnitAccess` (fetch the unit by `cuid`, then fetch all of the user's access rows by `uid` and filter them in memory) with a single `LEFT JOIN` of `WORKFLOW_COMPUTING_UNIT` to `COMPUTING_UNIT_USER_ACCESS` on `(cuid, uid)`, returning the owner uid and the caller's privilege in one round-trip. - Resolve the privilege from that single row: the owner gets `WRITE`, a present access row yields its privilege, and a null join (no grant) yields `NONE`. - A missing computing unit now resolves to `NONE` instead of throwing; the sole caller (`AccessControlResource`) already maps both `NONE` and a thrown exception to `403 FORBIDDEN`, so the externally observable behavior is unchanged. ### Performance comparison (previous vs current logic) Measured with a throwaway benchmark against the same embedded Postgres the unit tests use (`MockTexeraDB`), exercising the non-owner path (the only path that previously issued a second query). For each grant count the benchmark seeds a user holding that many grants, then times 2000 calls per implementation after a 200-call warmup. Microseconds per call, lower is better: | Grants held by the user | Previous (2 queries) | Current (1 join) | Speedup | | ---: | ---: | ---: | ---: | | 1 | 4382.5 us | 2395.1 us | 1.83x | | 10 | 3892.7 us | 1786.2 us | 2.18x | | 100 | 3260.0 us | 1690.4 us | 1.93x | | 1000 | 3650.8 us | 1630.0 us | 2.24x | So the rewrite is roughly 2x faster across the board. The speedup stays flat rather than growing with the grant count, which is itself informative: on this loopback setup the dominant cost is the extra database round-trip (two statements vs one), not the in-memory transfer-and-filter of the user's grant rows. Against a real database over a network, where per-statement latency is higher, halving the round-trips should matter at least as much. This addresses the fair concern that "more SQL queries do not necessarily mean slower": here the previous logic issued fewer total joins but paid for an extra round-trip, and measuring confirms the single join wins. These numbers come from a local micro-benchmark, not committed to the suite, and are absolute timings on embedded Postgres; treat the ratio as the signal, not the raw microseconds. ### Any related issues, documentation, discussions? Closes: #5645 ### How was this PR tested? - Added `ComputingUnitAccessSpec` covering the owner (`WRITE`), shared-grant (granted privilege), no-grant (`NONE`), and missing-unit (`NONE`) cases. - `sbt Auth/compile` and `sbt Auth/test`, expect a clean compile of the rewritten jOOQ query and a passing spec. - Manual, via the access-control route backed by `AccessControlResource`: request a computing unit as its owner, expect `WRITE`; as a user holding a shared grant, expect that grant's privilege; as a user with no grant, or for a non-existent `cuid`, expect `403 FORBIDDEN`. - Behavior-preservation check: confirm the only caller maps `NONE` to `403`, so the missing-unit path (previously a thrown NPE, now `NONE`) still yields `403`. ### Was this PR authored or co-authored using generative AI tooling? Generated-by: Claude Opus 4.8 (Claude Code), in compliance with the ASF Generative Tooling Guidance --- build.sbt | 2 + .../texera/auth/util/ComputingUnitAccess.scala | 46 ++++----- .../texera/auth/util/ComputingUnitAccessSpec.scala | 105 +++++++++++++++++++++ 3 files changed, 131 insertions(+), 22 deletions(-) diff --git a/build.sbt b/build.sbt index 43f97d9dff..3fac5bd097 100644 --- a/build.sbt +++ b/build.sbt @@ -66,7 +66,9 @@ lazy val DAO = (project in file("common/dao")).settings(asfLicensingSettings) lazy val Config = (project in file("common/config")).settings(asfLicensingSettings) lazy val Auth = (project in file("common/auth")) .settings(asfLicensingSettings) + .configs(Test) .dependsOn(DAO, Config) + .dependsOn(DAO % "test->test") // reuse MockTexeraDB embedded Postgres in tests lazy val ConfigService = (project in file("config-service")) .dependsOn(Auth, Config) .settings(asfLicensingSettings) diff --git a/common/auth/src/main/scala/org/apache/texera/auth/util/ComputingUnitAccess.scala b/common/auth/src/main/scala/org/apache/texera/auth/util/ComputingUnitAccess.scala index 743ed2e7e6..b999cb4890 100644 --- a/common/auth/src/main/scala/org/apache/texera/auth/util/ComputingUnitAccess.scala +++ b/common/auth/src/main/scala/org/apache/texera/auth/util/ComputingUnitAccess.scala @@ -18,15 +18,13 @@ package org.apache.texera.auth.util import org.apache.texera.dao.SqlServer -import org.apache.texera.dao.jooq.generated.enums.PrivilegeEnum -import org.apache.texera.dao.jooq.generated.tables.daos.{ - ComputingUnitUserAccessDao, - WorkflowComputingUnitDao +import org.apache.texera.dao.jooq.generated.Tables.{ + COMPUTING_UNIT_USER_ACCESS, + WORKFLOW_COMPUTING_UNIT } +import org.apache.texera.dao.jooq.generated.enums.PrivilegeEnum import org.jooq.DSLContext -import scala.jdk.CollectionConverters._ - object ComputingUnitAccess { private def context: DSLContext = SqlServer @@ -34,22 +32,26 @@ object ComputingUnitAccess { .createDSLContext() def getComputingUnitAccess(cuid: Integer, uid: Integer): PrivilegeEnum = { - val workflowComputingUnitDao = new WorkflowComputingUnitDao(context.configuration()) - val unit = workflowComputingUnitDao.fetchOneByCuid(cuid) - - if (unit.getUid.equals(uid)) { - return PrivilegeEnum.WRITE // owner has write access - } - - val computingUnitUserAccessDao = new ComputingUnitUserAccessDao(context.configuration()) - val accessOpt = computingUnitUserAccessDao - .fetchByUid(uid) - .asScala - .find(_.getCuid.equals(cuid)) - - accessOpt match { - case Some(access) => access.getPrivilege - case None => PrivilegeEnum.NONE + // At most one row: cuid is the PK of workflow_computing_unit and (cuid, uid) + // is the PK of computing_unit_user_access, so the left join cannot fan out. + val record = context + .select(WORKFLOW_COMPUTING_UNIT.UID, COMPUTING_UNIT_USER_ACCESS.PRIVILEGE) + .from(WORKFLOW_COMPUTING_UNIT) + .leftJoin(COMPUTING_UNIT_USER_ACCESS) + .on( + COMPUTING_UNIT_USER_ACCESS.CUID + .eq(WORKFLOW_COMPUTING_UNIT.CUID) + .and(COMPUTING_UNIT_USER_ACCESS.UID.eq(uid)) + ) + .where(WORKFLOW_COMPUTING_UNIT.CUID.eq(cuid)) + .fetchOne() + + if (record == null) { + PrivilegeEnum.NONE // no such unit + } else if (record.value1().equals(uid)) { + PrivilegeEnum.WRITE // owner + } else { + Option(record.value2()).getOrElse(PrivilegeEnum.NONE) } } } diff --git a/common/auth/src/test/scala/org/apache/texera/auth/util/ComputingUnitAccessSpec.scala b/common/auth/src/test/scala/org/apache/texera/auth/util/ComputingUnitAccessSpec.scala new file mode 100644 index 0000000000..6a5c519fc0 --- /dev/null +++ b/common/auth/src/test/scala/org/apache/texera/auth/util/ComputingUnitAccessSpec.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.auth.util + +import org.apache.texera.dao.MockTexeraDB +import org.apache.texera.dao.jooq.generated.enums.PrivilegeEnum +import org.apache.texera.dao.jooq.generated.tables.daos.{ + ComputingUnitUserAccessDao, + UserDao, + WorkflowComputingUnitDao +} +import org.apache.texera.dao.jooq.generated.tables.pojos.{ + ComputingUnitUserAccess, + User, + WorkflowComputingUnit +} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +// Exercises getComputingUnitAccess against an embedded Postgres seeded with a +// computing unit (cuid=100) owned by uid=1, plus a READ grant (uid=2) and a +// WRITE grant (uid=3). Covers every branch of the single-join resolution: +// missing unit, owner, explicit grant, and a user with no grant. +class ComputingUnitAccessSpec + extends AnyFlatSpec + with Matchers + with BeforeAndAfterAll + with MockTexeraDB { + + private val cuid = 100 + + private def newUser(uid: Int, name: String): User = { + val u = new User + u.setUid(uid) + u.setName(name) + u.setPassword("password") + u + } + + override def beforeAll(): Unit = { + initializeDBAndReplaceDSLContext() + val config = getDSLContext.configuration() + + val userDao = new UserDao(config) + Seq(1 -> "owner", 2 -> "reader", 3 -> "writer", 4 -> "stranger").foreach { + case (uid, name) => userDao.insert(newUser(uid, name)) + } + + val unit = new WorkflowComputingUnit + unit.setUid(1) + unit.setName("cu") + unit.setCuid(cuid) + new WorkflowComputingUnitDao(config).insert(unit) + + val accessDao = new ComputingUnitUserAccessDao(config) + Seq(2 -> PrivilegeEnum.READ, 3 -> PrivilegeEnum.WRITE).foreach { + case (uid, privilege) => + val access = new ComputingUnitUserAccess + access.setCuid(cuid) + access.setUid(uid) + access.setPrivilege(privilege) + accessDao.insert(access) + } + } + + override def afterAll(): Unit = shutdownDB() + + "getComputingUnitAccess" should "return NONE when the computing unit does not exist" in { + ComputingUnitAccess.getComputingUnitAccess(999, 1) shouldBe PrivilegeEnum.NONE + } + + it should "return WRITE for the owner regardless of any access row" in { + ComputingUnitAccess.getComputingUnitAccess(cuid, 1) shouldBe PrivilegeEnum.WRITE + } + + it should "return the granted READ privilege for a non-owner" in { + ComputingUnitAccess.getComputingUnitAccess(cuid, 2) shouldBe PrivilegeEnum.READ + } + + it should "return the granted WRITE privilege for a non-owner" in { + ComputingUnitAccess.getComputingUnitAccess(cuid, 3) shouldBe PrivilegeEnum.WRITE + } + + it should "return NONE for a user with no access row on an existing unit" in { + ComputingUnitAccess.getComputingUnitAccess(cuid, 4) shouldBe PrivilegeEnum.NONE + } +}
