This is an automated email from the ASF dual-hosted git repository. dimas pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/polaris-tools.git
The following commit(s) were added to refs/heads/main by this push: new 47e98f2 Remove sequential simulations and add continuous auth to Polaris benchmarks (#6) 47e98f2 is described below commit 47e98f28f787a8cf237ff30642edadbb558c1857 Author: Pierre Laporte <pie...@pingtimeout.fr> AuthorDate: Mon Apr 21 22:32:33 2025 +0200 Remove sequential simulations and add continuous auth to Polaris benchmarks (#6) * Remove sequential simulations and add continuous auth * Remove sequential simulations now that the concurrent simulation throughput is configurable (and can be set to 1 until SQL implementation is able to keep up with throughput) * Configure every workload using the configuration file * Change authentication logic so that OAuth token is refreshed every minute. This makes it possible to run a benchmark for longer than the default OAuth validity period (1h). It is useful for use cases like creating very large data sets or running longevity tests. * Code review: use block instead of argument for Gatling actions --- benchmarks/README.md | 41 +++++++++----- .../src/gatling/resources/benchmark-defaults.conf | 34 ++++++++++++ .../polaris/benchmarks/NAryTreeBuilder.scala | 3 +- .../benchmarks/parameters/BenchmarkConfig.scala | 32 ++++++++--- .../CreateTreeDatasetParameters.scala} | 33 +++++------ .../ReadTreeDatasetParameters.scala} | 33 +++++------ ...scala => ReadUpdateTreeDatasetParameters.scala} | 39 ++++--------- .../benchmarks/parameters/WorkloadParameters.scala | 5 +- .../benchmarks/simulations/CreateTreeDataset.scala | 63 +++++++++++++++++++-- .../simulations/CreateTreeDatasetConcurrent.scala | 57 ------------------- .../simulations/CreateTreeDatasetSequential.scala | 50 ----------------- .../benchmarks/simulations/ReadTreeDataset.scala | 54 +++++++++++++----- .../simulations/ReadUpdateTreeDataset.scala | 64 +++++++++++++++++++--- 13 files changed, 283 insertions(+), 225 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index f313084..c80cc13 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -6,9 +6,9 @@ to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 - + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -25,21 +25,28 @@ Benchmarks for the Polaris service using Gatling. ### Dataset Creation Benchmark -The CreateTreeDataset benchmark creates a test dataset with a specific structure. It exists in two variants: +The CreateTreeDataset benchmark creates a test dataset with a specific structure: -- `org.apache.polaris.benchmarks.simulations.CreateTreeDatasetSequential`: Creates entities one at a time -- `org.apache.polaris.benchmarks.simulations.CreateTreeDatasetConcurrent`: Creates up to 50 entities simultaneously +- `org.apache.polaris.benchmarks.simulations.CreateTreeDataset`: Creates up to 50 entities simultaneously -These are write-only workloads designed to populate the system for subsequent benchmarks. +This is a write-only workload designed to populate the system for subsequent benchmarks. ### Read/Update Benchmark -The ReadUpdateTreeDataset benchmark tests read and update operations on an existing dataset. It exists in two variants: +The ReadUpdateTreeDataset benchmark tests read and update operations on an existing dataset: + +- `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDataset`: Performs up to 20 read/update operations simultaneously + +This benchmark can only be run after using CreateTreeDataset to populate the system. + +### Read-Only Benchmark + +The ReadTreeDataset benchmark is a 100% read workload that fetches a tree dataset in Polaris: -- `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDatasetSequential`: Performs read/update operations one at a time -- `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDatasetConcurrent`: Performs up to 20 read/update operations simultaneously +- `org.apache.polaris.benchmarks.simulations.ReadTreeDataset`: Performs read-only operations to verify namespaces, tables, and views + +This benchmark is intended to be used against a Polaris instance with a pre-existing tree dataset. It has no side effects on the dataset and can be executed multiple times without any issues. -These benchmarks can only be run after using CreateTreeDataset to populate the system. ## Parameters @@ -117,13 +124,19 @@ workload { Run benchmarks with your configuration: ```bash -# Sequential dataset creation -./gradlew gatlingRun --simulation org.apache.polaris.benchmarks.simulations.CreateTreeDatasetSequential \ +# Dataset creation +./gradlew gatlingRun --simulation org.apache.polaris.benchmarks.simulations.CreateTreeDataset \ + -Dconfig.file=./application.conf + +# Read/Update operations +./gradlew gatlingRun --simulation org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDataset \ -Dconfig.file=./application.conf -# Concurrent dataset creation -./gradlew gatlingRun --simulation org.apache.polaris.benchmarks.simulations.CreateTreeDatasetConcurrent \ +# Read-only operations +./gradlew gatlingRun --simulation org.apache.polaris.benchmarks.simulations.ReadTreeDataset \ -Dconfig.file=./application.conf + + ``` A message will show the location of the Gatling report: diff --git a/benchmarks/src/gatling/resources/benchmark-defaults.conf b/benchmarks/src/gatling/resources/benchmark-defaults.conf index 8aae51c..21b0451 100644 --- a/benchmarks/src/gatling/resources/benchmark-defaults.conf +++ b/benchmarks/src/gatling/resources/benchmark-defaults.conf @@ -140,4 +140,38 @@ workload { # Number of property updates to perform per individual view # Default: 10 updates-per-view = 10 + + + # Configuration for the ReadTreeDataset simulation + read-tree-dataset { + # Number of table operations to perform per second + # Default: 20 + table-throughput = 20 + + # Number of view operations to perform per second + # Default: 10 + view-throughput = 10 + } + + # Configuration for the CreateTreeDataset simulation + create-tree-dataset { + # Number of table operations to perform per second + # Default: 20 + table-throughput = 20 + + # Number of view operations to perform per second + # Default: 10 + view-throughput = 10 + } + + # Configuration for the ReadUpdateTreeDataset simulation + read-update-tree-dataset { + # Number of operations to perform per second + # Default: 100 + throughput = 100 + + # Duration of the simulation in minutes + # Default: 5 + duration-in-minutes = 5 + } } diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/NAryTreeBuilder.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/NAryTreeBuilder.scala index bf9ed54..326ef6d 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/NAryTreeBuilder.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/NAryTreeBuilder.scala @@ -67,7 +67,7 @@ case class NAryTreeBuilder(nsWidth: Int, nsDepth: Int) { * * @return The total number of nodes in the tree. */ - val numberOfNodes: Int = { + val numberOfNodes: Int = // The sum of nodes from level 0 to level d-1 is (n^(d+1) - 1) / (n - 1) if n > 1 // Else, the sum of nodes from level 0 to level d-1 is d if (nsWidth == 1) { @@ -75,7 +75,6 @@ case class NAryTreeBuilder(nsWidth: Int, nsDepth: Int) { } else { ((math.pow(nsWidth, nsDepth) - 1) / (nsWidth - 1)).toInt } - } /** * Returns a range of ordinals for the nodes on the last level of a complete n-ary tree. diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala index 3799965..755a34d 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala @@ -38,13 +38,31 @@ object BenchmarkConfig { http.getString("base-url") ) - val workloadParams = WorkloadParameters( - workload.getDouble("read-write-ratio"), - workload.getInt("updates-per-namespace"), - workload.getInt("updates-per-table"), - workload.getInt("updates-per-view"), - workload.getLong("seed") - ) + val workloadParams = { + val rtdConfig = workload.getConfig("read-tree-dataset") + val ctdConfig = workload.getConfig("create-tree-dataset") + val rutdConfig = workload.getConfig("read-update-tree-dataset") + + WorkloadParameters( + workload.getDouble("read-write-ratio"), + workload.getInt("updates-per-namespace"), + workload.getInt("updates-per-table"), + workload.getInt("updates-per-view"), + workload.getLong("seed"), + ReadTreeDatasetParameters( + rtdConfig.getInt("table-throughput"), + rtdConfig.getInt("view-throughput") + ), + CreateTreeDatasetParameters( + ctdConfig.getInt("table-throughput"), + ctdConfig.getInt("view-throughput") + ), + ReadUpdateTreeDatasetParameters( + rutdConfig.getInt("throughput"), + rutdConfig.getInt("duration-in-minutes") + ) + ) + } val datasetParams = DatasetParameters( dataset.getInt("num-catalogs"), diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetSequential.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/CreateTreeDatasetParameters.scala similarity index 50% rename from benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetSequential.scala rename to benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/CreateTreeDatasetParameters.scala index 4c9e80e..68a72ad 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetSequential.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/CreateTreeDatasetParameters.scala @@ -17,25 +17,18 @@ * under the License. */ -package org.apache.polaris.benchmarks.simulations +package org.apache.polaris.benchmarks.parameters -import io.gatling.core.Predef._ -import io.gatling.http.Predef._ - -import scala.concurrent.duration.DurationInt - -class ReadUpdateTreeDatasetSequential extends ReadUpdateTreeDataset { - // -------------------------------------------------------------------------------- - // Build up the HTTP protocol configuration and set up the simulation - // -------------------------------------------------------------------------------- - private val httpProtocol = http - .baseUrl(cp.baseUrl) - .acceptHeader("application/json") - .contentTypeHeader("application/json") - - setUp( - authenticate - .inject(atOnceUsers(1)) - .andThen(readWriteScenario.inject(constantUsersPerSec(1).during(5.minutes))) - ).protocols(httpProtocol) +/** + * Case class to hold the parameters for the CreateTreeDataset simulation. + * + * @param tableThroughput The number of table operations to perform per second. + * @param viewThroughput The number of view operations to perform per second. + */ +case class CreateTreeDatasetParameters( + tableThroughput: Int, + viewThroughput: Int +) { + require(tableThroughput >= 0, "Table throughput cannot be negative") + require(viewThroughput >= 0, "View throughput cannot be negative") } diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetConcurrent.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ReadTreeDatasetParameters.scala similarity index 50% rename from benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetConcurrent.scala rename to benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ReadTreeDatasetParameters.scala index 35e219a..82c1aa0 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetConcurrent.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ReadTreeDatasetParameters.scala @@ -17,25 +17,18 @@ * under the License. */ -package org.apache.polaris.benchmarks.simulations +package org.apache.polaris.benchmarks.parameters -import io.gatling.core.Predef._ -import io.gatling.http.Predef._ - -import scala.concurrent.duration.DurationInt - -class ReadUpdateTreeDatasetConcurrent extends ReadUpdateTreeDataset { - // -------------------------------------------------------------------------------- - // Build up the HTTP protocol configuration and set up the simulation - // -------------------------------------------------------------------------------- - private val httpProtocol = http - .baseUrl(cp.baseUrl) - .acceptHeader("application/json") - .contentTypeHeader("application/json") - - setUp( - authenticate - .inject(atOnceUsers(1)) - .andThen(readWriteScenario.inject(constantUsersPerSec(100).during(5.minutes).randomized)) - ).protocols(httpProtocol) +/** + * Case class to hold the parameters for the ReadTreeDataset simulation. + * + * @param tableThroughput The number of table operations to perform per second. + * @param viewThroughput The number of view operations to perform per second. + */ +case class ReadTreeDatasetParameters( + tableThroughput: Int, + viewThroughput: Int +) { + require(tableThroughput >= 0, "Table throughput cannot be negative") + require(viewThroughput >= 0, "View throughput cannot be negative") } diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ReadUpdateTreeDatasetParameters.scala similarity index 56% copy from benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala copy to benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ReadUpdateTreeDatasetParameters.scala index 6831f44..ada6cac 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ReadUpdateTreeDatasetParameters.scala @@ -19,33 +19,16 @@ package org.apache.polaris.benchmarks.parameters -case class WorkloadParameters( - readWriteRatio: Double, - updatesPerNamespace: Int, - updatesPerTable: Int, - updatesPerView: Int, - seed: Long +/** + * Case class to hold the parameters for the ReadUpdateTreeDataset simulation. + * + * @param throughput The number of operations to perform per second. + * @param durationInMinutes The duration of the simulation in minutes. + */ +case class ReadUpdateTreeDatasetParameters( + throughput: Int, + durationInMinutes: Int ) { - require( - readWriteRatio >= 0.0 && readWriteRatio <= 1.0, - "Read/write ratio must be between 0.0 and 1.0 inclusive" - ) - - require( - updatesPerNamespace >= 0, - "Updates per namespace must be non-negative" - ) - - require( - updatesPerTable >= 0, - "Updates per table must be non-negative" - ) - - require( - updatesPerView >= 0, - "Updates per view must be non-negative" - ) - - val gatlingReadRatio: Double = readWriteRatio * 100 - val gatlingWriteRatio: Double = (1 - readWriteRatio) * 100 + require(throughput >= 0, "Throughput cannot be negative") + require(durationInMinutes > 0, "Duration in minutes must be positive") } diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala index 6831f44..daad6bc 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala @@ -24,7 +24,10 @@ case class WorkloadParameters( updatesPerNamespace: Int, updatesPerTable: Int, updatesPerView: Int, - seed: Long + seed: Long, + readTreeDataset: ReadTreeDatasetParameters, + createTreeDataset: CreateTreeDatasetParameters, + readUpdateTreeDataset: ReadUpdateTreeDatasetParameters ) { require( readWriteRatio >= 0.0 && readWriteRatio <= 1.0, diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDataset.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDataset.scala index cd01faf..a4f3610 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDataset.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDataset.scala @@ -21,6 +21,7 @@ package org.apache.polaris.benchmarks.simulations import io.gatling.core.Predef._ import io.gatling.core.structure.ScenarioBuilder +import io.gatling.http.Predef._ import org.apache.polaris.benchmarks.actions._ import org.apache.polaris.benchmarks.parameters.BenchmarkConfig.config import org.apache.polaris.benchmarks.parameters.{ @@ -30,7 +31,8 @@ import org.apache.polaris.benchmarks.parameters.{ } import org.slf4j.LoggerFactory -import java.util.concurrent.atomic.{AtomicInteger, AtomicReference} +import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicReference} +import scala.concurrent.duration._ /** * This simulation is a 100% write workload that creates a tree dataset in Polaris. It is intended @@ -51,6 +53,7 @@ class CreateTreeDataset extends Simulation { // -------------------------------------------------------------------------------- private val numNamespaces: Int = dp.nAryTree.numberOfNodes private val accessToken: AtomicReference[String] = new AtomicReference() + private val shouldRefreshToken: AtomicBoolean = new AtomicBoolean(true) private val authenticationActions = AuthenticationActions(cp, accessToken, 5, Set(500)) private val catalogActions = CatalogActions(dp, accessToken, 0, Set()) @@ -64,11 +67,31 @@ class CreateTreeDataset extends Simulation { private val createdViews = new AtomicInteger() // -------------------------------------------------------------------------------- - // Workload: Authenticate and store the access token for later use + // Authentication related workloads: + // * Authenticate and store the access token for later use every minute + // * Wait for an OAuth token to be available + // * Stop the token refresh loop // -------------------------------------------------------------------------------- - val authenticate: ScenarioBuilder = scenario("Authenticate using the OAuth2 REST API endpoint") - .feed(authenticationActions.feeder()) - .exec(authenticationActions.authenticateAndSaveAccessToken) + val continuouslyRefreshOauthToken: ScenarioBuilder = + scenario("Authenticate every minute using the Iceberg REST API") + .asLongAs(_ => shouldRefreshToken.get()) { + feed(authenticationActions.feeder()) + .exec(authenticationActions.authenticateAndSaveAccessToken) + .pause(1.minute) + } + + val waitForAuthentication: ScenarioBuilder = + scenario("Wait for the authentication token to be available") + .asLongAs(_ => accessToken.get() == null) { + pause(1.second) + } + + val stopRefreshingToken: ScenarioBuilder = + scenario("Stop refreshing the authentication token") + .exec { session => + shouldRefreshToken.set(false) + session + } // -------------------------------------------------------------------------------- // Workload: Create catalogs @@ -118,4 +141,34 @@ class CreateTreeDataset extends Simulation { feed(viewActions.viewCreationFeeder()) .exec(viewActions.createView) ) + + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + // Get the configured throughput for tables and views + private val tableThroughput = wp.createTreeDataset.tableThroughput + private val viewThroughput = wp.createTreeDataset.viewThroughput + + setUp( + continuouslyRefreshOauthToken.inject(atOnceUsers(1)).protocols(httpProtocol), + waitForAuthentication + .inject(atOnceUsers(1)) + .andThen(createCatalogs.inject(atOnceUsers(1)).protocols(httpProtocol)) + .andThen( + createNamespaces + .inject( + constantUsersPerSec(1).during(1.seconds), + constantUsersPerSec(dp.nsWidth - 1).during(dp.nsDepth.seconds) + ) + .protocols(httpProtocol) + ) + .andThen(createTables.inject(atOnceUsers(tableThroughput)).protocols(httpProtocol)) + .andThen(createViews.inject(atOnceUsers(viewThroughput)).protocols(httpProtocol)) + .andThen(stopRefreshingToken.inject(atOnceUsers(1)).protocols(httpProtocol)) + ) } diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetConcurrent.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetConcurrent.scala deleted file mode 100644 index 6381bd7..0000000 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetConcurrent.scala +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.polaris.benchmarks.simulations - -import io.gatling.core.Predef._ -import io.gatling.http.Predef._ -import org.slf4j.LoggerFactory - -import scala.concurrent.duration._ - -/** - * This simulation is a 100% write workload that creates a tree dataset in Polaris. It is intended - * to be used against an empty Polaris instance. It is a concurrent version of CreateTreeDataset, - * i.e. up to 50 requests are sent simultaneously. - */ -class CreateTreeDatasetConcurrent extends CreateTreeDataset { - private val logger = LoggerFactory.getLogger(getClass) - - // -------------------------------------------------------------------------------- - // Build up the HTTP protocol configuration and set up the simulation - // -------------------------------------------------------------------------------- - private val httpProtocol = http - .baseUrl(cp.baseUrl) - .acceptHeader("application/json") - .contentTypeHeader("application/json") - - setUp( - authenticate - .inject(atOnceUsers(1)) - .andThen(createCatalogs.inject(atOnceUsers(50))) - .andThen( - createNamespaces.inject( - constantUsersPerSec(1).during(1.seconds), - constantUsersPerSec(dp.nsWidth - 1).during(dp.nsDepth.seconds) - ) - ) - .andThen(createTables.inject(atOnceUsers(20))) - .andThen(createViews.inject(atOnceUsers(20))) - ).protocols(httpProtocol) -} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetSequential.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetSequential.scala deleted file mode 100644 index cc46c51..0000000 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetSequential.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.polaris.benchmarks.simulations - -import io.gatling.core.Predef._ -import io.gatling.http.Predef._ -import org.slf4j.LoggerFactory - -/** - * This simulation is a 100% write workload that creates a tree dataset in Polaris. It is intended - * to be used against an empty Polaris instance. It is a sequential version of CreateTreeDataset, - * i.e. only one request is sent at a time. - */ -class CreateTreeDatasetSequential extends CreateTreeDataset { - private val logger = LoggerFactory.getLogger(getClass) - - // -------------------------------------------------------------------------------- - // Build up the HTTP protocol configuration and set up the simulation - // -------------------------------------------------------------------------------- - private val httpProtocol = http - .baseUrl(cp.baseUrl) - .acceptHeader("application/json") - .contentTypeHeader("application/json") - - setUp( - authenticate - .inject(atOnceUsers(1)) - .andThen(createCatalogs.inject(atOnceUsers(1))) - .andThen(createNamespaces.inject(atOnceUsers(1))) - .andThen(createTables.inject(atOnceUsers(1))) - .andThen(createViews.inject(atOnceUsers(1))) - ).protocols(httpProtocol) -} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadTreeDataset.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadTreeDataset.scala index b35c409..942105f 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadTreeDataset.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadTreeDataset.scala @@ -20,13 +20,15 @@ package org.apache.polaris.benchmarks.simulations import io.gatling.core.Predef._ +import io.gatling.core.structure.ScenarioBuilder import io.gatling.http.Predef._ import org.apache.polaris.benchmarks.actions._ import org.apache.polaris.benchmarks.parameters.BenchmarkConfig.config import org.apache.polaris.benchmarks.parameters.WorkloadParameters import org.slf4j.LoggerFactory -import java.util.concurrent.atomic.{AtomicInteger, AtomicReference} +import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicReference} +import scala.concurrent.duration.DurationInt /** * This simulation is a 100% read workload that fetches a tree dataset in Polaris. It is intended to @@ -48,6 +50,7 @@ class ReadTreeDataset extends Simulation { // -------------------------------------------------------------------------------- private val numNamespaces: Int = dp.nAryTree.numberOfNodes private val accessToken: AtomicReference[String] = new AtomicReference() + private val shouldRefreshToken: AtomicBoolean = new AtomicBoolean(true) private val authenticationActions = AuthenticationActions(cp, accessToken) private val catalogActions = CatalogActions(dp, accessToken) @@ -61,13 +64,31 @@ class ReadTreeDataset extends Simulation { private val verifiedViews = new AtomicInteger() // -------------------------------------------------------------------------------- - // Workload: Authenticate and store the access token for later use - // -------------------------------------------------------------------------------- - private val authenticate = scenario("Authenticate using the OAuth2 REST API endpoint") - .feed(authenticationActions.feeder()) - .tryMax(5) { - exec(authenticationActions.authenticateAndSaveAccessToken) - } + // Authentication related workloads: + // * Authenticate and store the access token for later use every minute + // * Wait for an OAuth token to be available + // * Stop the token refresh loop + // -------------------------------------------------------------------------------- + val continuouslyRefreshOauthToken: ScenarioBuilder = + scenario("Authenticate every minute using the Iceberg REST API") + .asLongAs(_ => shouldRefreshToken.get()) { + feed(authenticationActions.feeder()) + .exec(authenticationActions.authenticateAndSaveAccessToken) + .pause(1.minute) + } + + val waitForAuthentication: ScenarioBuilder = + scenario("Wait for the authentication token to be available") + .asLongAs(_ => accessToken.get() == null) { + pause(1.second) + } + + val stopRefreshingToken: ScenarioBuilder = + scenario("Stop refreshing the authentication token") + .exec { session => + shouldRefreshToken.set(false) + session + } // -------------------------------------------------------------------------------- // Workload: Verify each catalog @@ -131,13 +152,18 @@ class ReadTreeDataset extends Simulation { .acceptHeader("application/json") .contentTypeHeader("application/json") + // Get the configured throughput for tables and views + private val tableThroughput = wp.readTreeDataset.tableThroughput + private val viewThroughput = wp.readTreeDataset.viewThroughput + setUp( - authenticate + continuouslyRefreshOauthToken.inject(atOnceUsers(1)).protocols(httpProtocol), + waitForAuthentication .inject(atOnceUsers(1)) - .andThen(verifyCatalogs.inject(atOnceUsers(1))) - .andThen(verifyNamespaces.inject(atOnceUsers(dp.nsDepth))) - .andThen(verifyTables.inject(atOnceUsers(50))) - .andThen(verifyViews.inject(atOnceUsers(50))) + .andThen(verifyCatalogs.inject(atOnceUsers(1)).protocols(httpProtocol)) + .andThen(verifyNamespaces.inject(atOnceUsers(dp.nsDepth)).protocols(httpProtocol)) + .andThen(verifyTables.inject(atOnceUsers(tableThroughput)).protocols(httpProtocol)) + .andThen(verifyViews.inject(atOnceUsers(viewThroughput)).protocols(httpProtocol)) + .andThen(stopRefreshingToken.inject(atOnceUsers(1)).protocols(httpProtocol)) ) - .protocols(httpProtocol) } diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala index c1dbe37..080152c 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala @@ -21,6 +21,7 @@ package org.apache.polaris.benchmarks.simulations import io.gatling.core.Predef._ import io.gatling.core.structure.ScenarioBuilder +import io.gatling.http.Predef._ import org.apache.polaris.benchmarks.actions._ import org.apache.polaris.benchmarks.parameters.BenchmarkConfig.config import org.apache.polaris.benchmarks.parameters.{ @@ -31,8 +32,12 @@ import org.apache.polaris.benchmarks.parameters.{ import org.apache.polaris.benchmarks.util.CircularIterator import org.slf4j.LoggerFactory -import java.util.concurrent.atomic.AtomicReference +import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference} +import scala.concurrent.duration._ +/** + * This simulation tests read and update operations on an existing dataset. + */ class ReadUpdateTreeDataset extends Simulation { private val logger = LoggerFactory.getLogger(getClass) @@ -48,6 +53,7 @@ class ReadUpdateTreeDataset extends Simulation { // -------------------------------------------------------------------------------- private val numNamespaces: Int = dp.nAryTree.numberOfNodes private val accessToken: AtomicReference[String] = new AtomicReference() + private val shouldRefreshToken: AtomicBoolean = new AtomicBoolean(true) private val authActions = AuthenticationActions(cp, accessToken) private val catActions = CatalogActions(dp, accessToken) @@ -56,13 +62,31 @@ class ReadUpdateTreeDataset extends Simulation { private val viewActions = ViewActions(dp, wp, accessToken) // -------------------------------------------------------------------------------- - // Workload: Authenticate and store the access token for later use + // Authentication related workloads: + // * Authenticate and store the access token for later use every minute + // * Wait for an OAuth token to be available + // * Stop the token refresh loop // -------------------------------------------------------------------------------- - val authenticate: ScenarioBuilder = scenario("Authenticate using the OAuth2 REST API endpoint") - .feed(authActions.feeder()) - .tryMax(5) { - exec(authActions.authenticateAndSaveAccessToken) - } + val continuouslyRefreshOauthToken: ScenarioBuilder = + scenario("Authenticate every minute using the Iceberg REST API") + .asLongAs(_ => shouldRefreshToken.get()) { + feed(authActions.feeder()) + .exec(authActions.authenticateAndSaveAccessToken) + .pause(1.minute) + } + + val waitForAuthentication: ScenarioBuilder = + scenario("Wait for the authentication token to be available") + .asLongAs(_ => accessToken.get() == null) { + pause(1.second) + } + + val stopRefreshingToken: ScenarioBuilder = + scenario("Stop refreshing the authentication token") + .exec { session => + shouldRefreshToken.set(false) + session + } private val nsListFeeder = new CircularIterator(nsActions.namespaceIdentityFeeder) private val nsExistsFeeder = new CircularIterator(nsActions.namespaceIdentityFeeder) @@ -107,4 +131,30 @@ class ReadUpdateTreeDataset extends Simulation { ) ) ) + + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + // Get the configured throughput and duration + private val throughput = wp.readUpdateTreeDataset.throughput + private val durationInMinutes = wp.readUpdateTreeDataset.durationInMinutes + + setUp( + continuouslyRefreshOauthToken.inject(atOnceUsers(1)).protocols(httpProtocol), + waitForAuthentication + .inject(atOnceUsers(1)) + .andThen( + readWriteScenario + .inject( + constantUsersPerSec(throughput).during(durationInMinutes.minutes).randomized + ) + .protocols(httpProtocol) + ) + .andThen(stopRefreshingToken.inject(atOnceUsers(1)).protocols(httpProtocol)) + ) }