This is an automated email from the ASF dual-hosted git repository. pingtimeout pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/polaris-tools.git
The following commit(s) were added to refs/heads/main by this push: new 9391873 [feat] Add commits creator simulation (#24) 9391873 is described below commit 93918732a374dab1fb9f9e08c1a02d750450fa9a Author: Pierre Laporte <pie...@pingtimeout.fr> AuthorDate: Thu Jun 12 09:28:35 2025 +0200 [feat] Add commits creator simulation (#24) This new simulation adds a new workload that continuously updates the properties of every table and every view. There is no bound to the maximum number of properties that can be created this way. Each new property results in a new snapshot to be created for that particular entity. This can quickly amount to hundreds of commits entity. This simulation can serve as the basis for table metadata management workloads. --- benchmarks/README.md | 5 +- .../src/gatling/resources/benchmark-defaults.conf | 15 +++ .../benchmarks/parameters/BenchmarkConfig.scala | 6 + ...ameters.scala => CreateCommitsParameters.scala} | 21 +++- .../benchmarks/parameters/WorkloadParameters.scala | 1 + .../benchmarks/simulations/CreateCommits.scala | 138 +++++++++++++++++++++ 6 files changed, 180 insertions(+), 6 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 31be3d6..4622bc1 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -26,6 +26,7 @@ Benchmarks for the Polaris service using Gatling. - `org.apache.polaris.benchmarks.simulations.CreateTreeDataset`: Creates a test dataset with a specific structure. It is a write-only workload designed to populate the system for subsequent benchmarks. - `org.apache.polaris.benchmarks.simulations.ReadTreeDataset`: Performs read-only operations to fetch namespaces, tables, and views. Some attributes of the objects are also fetched. This benchmark is intended to be used against a Polaris instance with a pre-existing tree dataset. It has no side effects on the dataset and can be executed multiple times without any issues. - `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDataset`: Performs read and update operations against a Polaris instance populated with a test dataset. It is a read/write workload that can be used to test the system's ability to handle concurrent read and update operations. It is not destructive and does not prevent subsequent executions of `ReadTreeDataset` or `ReadUpdateTreeDataset`. +- `org.apache.polaris.benchmarks.simulations.CreateCommits`: Creates table and view commits at configurable rates. This benchmark is useful for testing the system's ability to handle table and view commits and can be used to generate a history of thousands of commits for both tables and views. ## Parameters @@ -119,7 +120,9 @@ Run benchmarks with your configuration: ./gradlew gatlingRun --simulation org.apache.polaris.benchmarks.simulations.ReadTreeDataset \ -Dconfig.file=./application.conf - +# Commits creation +./gradlew gatlingRun --simulation org.apache.polaris.benchmarks.simulations.CreateCommits \ + -Dconfig.file=./application.conf ``` A message will show the location of the Gatling report: diff --git a/benchmarks/src/gatling/resources/benchmark-defaults.conf b/benchmarks/src/gatling/resources/benchmark-defaults.conf index aaa2a99..e25937d 100644 --- a/benchmarks/src/gatling/resources/benchmark-defaults.conf +++ b/benchmarks/src/gatling/resources/benchmark-defaults.conf @@ -121,6 +121,21 @@ dataset.tree { # Workload configuration workload { + # Configuration for the CreateCommits simulation + create-commits { + # Number of table commits to create per second + # Default: 10 + table-commits-throughput = 10 + + # Number of view commits to create per second + # Default: 5 + view-commits-throughput = 5 + + # Duration of the simulation in minutes + # Default: 1 + duration-in-minutes = 1 + } + # Configuration for the ReadTreeDataset simulation read-tree-dataset { # Number of table operations to perform simultaneously diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala index a8a0f6e..728b7c5 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala @@ -39,11 +39,17 @@ object BenchmarkConfig { ) val workloadParams = { + val ccConfig = workload.getConfig("create-commits") val rtdConfig = workload.getConfig("read-tree-dataset") val ctdConfig = workload.getConfig("create-tree-dataset") val rutdConfig = workload.getConfig("read-update-tree-dataset") WorkloadParameters( + CreateCommitsParameters( + ccConfig.getInt("table-commits-throughput"), + ccConfig.getInt("view-commits-throughput"), + ccConfig.getInt("duration-in-minutes") + ), ReadTreeDatasetParameters( rtdConfig.getInt("table-concurrency"), rtdConfig.getInt("view-concurrency") diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/CreateCommitsParameters.scala similarity index 55% copy from benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala copy to benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/CreateCommitsParameters.scala index b6fec3c..995a0fe 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/CreateCommitsParameters.scala @@ -19,8 +19,19 @@ package org.apache.polaris.benchmarks.parameters -case class WorkloadParameters( - readTreeDataset: ReadTreeDatasetParameters, - createTreeDataset: CreateTreeDatasetParameters, - readUpdateTreeDataset: ReadUpdateTreeDatasetParameters -) {} +/** + * Case class to hold the parameters for the CreateCommits simulation. + * + * @param tableCommitsThroughput The number of table commits to create per second. + * @param viewCommitsThroughput The number of view commits to create per second. + * @param durationInMinutes The duration of the simulation in minutes. + */ +case class CreateCommitsParameters( + tableCommitsThroughput: Int, + viewCommitsThroughput: Int, + durationInMinutes: Int +) { + require(tableCommitsThroughput >= 0, "Table commits throughput cannot be negative") + require(viewCommitsThroughput >= 0, "View commits throughput cannot be negative") + require(durationInMinutes > 0, "Duration in minutes must be positive") +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala index b6fec3c..b392870 100644 --- a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala @@ -20,6 +20,7 @@ package org.apache.polaris.benchmarks.parameters case class WorkloadParameters( + createCommits: CreateCommitsParameters, readTreeDataset: ReadTreeDatasetParameters, createTreeDataset: CreateTreeDatasetParameters, readUpdateTreeDataset: ReadUpdateTreeDatasetParameters diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateCommits.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateCommits.scala new file mode 100644 index 0000000..cd8e7c9 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateCommits.scala @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.core.structure.ScenarioBuilder +import io.gatling.http.Predef.http +import org.apache.polaris.benchmarks.actions.{ + AuthenticationActions, + NamespaceActions, + TableActions, + ViewActions +} +import org.apache.polaris.benchmarks.parameters.BenchmarkConfig.config +import org.apache.polaris.benchmarks.parameters.{ + ConnectionParameters, + DatasetParameters, + WorkloadParameters +} +import org.apache.polaris.benchmarks.util.CircularIterator +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference} +import scala.concurrent.duration._ + +class CreateCommits extends Simulation { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Load parameters + // -------------------------------------------------------------------------------- + val cp: ConnectionParameters = config.connectionParameters + val dp: DatasetParameters = config.datasetParameters + val wp: WorkloadParameters = config.workloadParameters + + // -------------------------------------------------------------------------------- + // Helper values + // -------------------------------------------------------------------------------- + private val accessToken: AtomicReference[String] = new AtomicReference() + private val shouldRefreshToken: AtomicBoolean = new AtomicBoolean(true) + + private val authActions = AuthenticationActions(cp, accessToken) + private val tableActions = TableActions(dp, wp, accessToken) + private val viewActions = ViewActions(dp, wp, accessToken) + + // -------------------------------------------------------------------------------- + // Authentication related workloads: + // * Authenticate and store the access token for later use every minute + // * Wait for an OAuth token to be available + // * Stop the token refresh loop + // -------------------------------------------------------------------------------- + val continuouslyRefreshOauthToken: ScenarioBuilder = + scenario("Authenticate every minute using the Iceberg REST API") + .asLongAs(_ => shouldRefreshToken.get())( + feed(authActions.feeder()) + .exec(authActions.authenticateAndSaveAccessToken) + .pause(1.minute) + ) + + val waitForAuthentication: ScenarioBuilder = + scenario("Wait for the authentication token to be available") + .asLongAs(_ => accessToken.get() == null)( + pause(1.second) + ) + + val stopRefreshingToken: ScenarioBuilder = + scenario("Stop refreshing the authentication token") + .exec { session => + shouldRefreshToken.set(false) + session + } + + // -------------------------------------------------------------------------------- + // Read and write workloads: + // * Create table commits by updating table properties + // * Read namespaces, tables and views + // -------------------------------------------------------------------------------- + val tableUpdateScenario: ScenarioBuilder = + scenario("Create table commits by updating properties") + .exec(authActions.restoreAccessTokenInSession) + .feed(tableActions.propertyUpdateFeeder()) + .exec(tableActions.updateTable) + + // -------------------------------------------------------------------------------- + // Read and write workloads: + // * Create table commits by updating table properties + // * Read namespaces, tables and views + // -------------------------------------------------------------------------------- + val viewUpdateScenario: ScenarioBuilder = + scenario("Create view commits by updating properties") + .exec(authActions.restoreAccessTokenInSession) + .feed(viewActions.propertyUpdateFeeder()) + .exec(viewActions.updateView) + + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + private val tableCommitsThroughput = wp.createCommits.tableCommitsThroughput + private val viewCommitsThroughput = wp.createCommits.viewCommitsThroughput + private val durationInMinutes = wp.createCommits.durationInMinutes + setUp( + continuouslyRefreshOauthToken.inject(atOnceUsers(1)).protocols(httpProtocol), + waitForAuthentication + .inject(atOnceUsers(1)) + .andThen( + tableUpdateScenario + .inject( + constantUsersPerSec(tableCommitsThroughput).during(durationInMinutes.minutes) + ) + .protocols(httpProtocol), + viewUpdateScenario + .inject( + constantUsersPerSec(viewCommitsThroughput).during(durationInMinutes.minutes) + ) + .protocols(httpProtocol) + ) + .andThen(stopRefreshingToken.inject(atOnceUsers(1)).protocols(httpProtocol)) + ) +}