This is an automated email from the ASF dual-hosted git repository.

pingtimeout pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris-tools.git


The following commit(s) were added to refs/heads/main by this push:
     new 9391873  [feat] Add commits creator simulation (#24)
9391873 is described below

commit 93918732a374dab1fb9f9e08c1a02d750450fa9a
Author: Pierre Laporte <pie...@pingtimeout.fr>
AuthorDate: Thu Jun 12 09:28:35 2025 +0200

    [feat] Add commits creator simulation (#24)
    
    This new simulation adds a new workload that continuously updates the
    properties of every table and every view.  There is no bound to the
    maximum number of properties that can be created this way.  Each new
    property results in a new snapshot to be created for that particular
    entity.  This can quickly amount to hundreds of commits entity.
    
    This simulation can serve as the basis for table metadata management
    workloads.
---
 benchmarks/README.md                               |   5 +-
 .../src/gatling/resources/benchmark-defaults.conf  |  15 +++
 .../benchmarks/parameters/BenchmarkConfig.scala    |   6 +
 ...ameters.scala => CreateCommitsParameters.scala} |  21 +++-
 .../benchmarks/parameters/WorkloadParameters.scala |   1 +
 .../benchmarks/simulations/CreateCommits.scala     | 138 +++++++++++++++++++++
 6 files changed, 180 insertions(+), 6 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 31be3d6..4622bc1 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -26,6 +26,7 @@ Benchmarks for the Polaris service using Gatling.
 - `org.apache.polaris.benchmarks.simulations.CreateTreeDataset`: Creates a 
test dataset with a specific structure.  It is a write-only workload designed 
to populate the system for subsequent benchmarks.
 - `org.apache.polaris.benchmarks.simulations.ReadTreeDataset`: Performs 
read-only operations to fetch namespaces, tables, and views.  Some attributes 
of the objects are also fetched.  This benchmark is intended to be used against 
a Polaris instance with a pre-existing tree dataset.  It has no side effects on 
the dataset and can be executed multiple times without any issues.
 - `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDataset`: Performs 
read and update operations against a Polaris instance populated with a test 
dataset.  It is a read/write workload that can be used to test the system's 
ability to handle concurrent read and update operations.  It is not destructive 
and does not prevent subsequent executions of `ReadTreeDataset` or 
`ReadUpdateTreeDataset`.
+- `org.apache.polaris.benchmarks.simulations.CreateCommits`: Creates table and 
view commits at configurable rates.  This benchmark is useful for testing the 
system's ability to handle table and view commits and can be used to generate a 
history of thousands of commits for both tables and views.
 
 ## Parameters
 
@@ -119,7 +120,9 @@ Run benchmarks with your configuration:
 ./gradlew gatlingRun --simulation 
org.apache.polaris.benchmarks.simulations.ReadTreeDataset \
   -Dconfig.file=./application.conf
 
-
+# Commits creation
+./gradlew gatlingRun --simulation 
org.apache.polaris.benchmarks.simulations.CreateCommits \
+  -Dconfig.file=./application.conf
 ```
 
 A message will show the location of the Gatling report:
diff --git a/benchmarks/src/gatling/resources/benchmark-defaults.conf 
b/benchmarks/src/gatling/resources/benchmark-defaults.conf
index aaa2a99..e25937d 100644
--- a/benchmarks/src/gatling/resources/benchmark-defaults.conf
+++ b/benchmarks/src/gatling/resources/benchmark-defaults.conf
@@ -121,6 +121,21 @@ dataset.tree {
 
 # Workload configuration
 workload {
+  # Configuration for the CreateCommits simulation
+  create-commits {
+    # Number of table commits to create per second
+    # Default: 10
+    table-commits-throughput = 10
+
+    # Number of view commits to create per second
+    # Default: 5
+    view-commits-throughput = 5
+
+    # Duration of the simulation in minutes
+    # Default: 1
+    duration-in-minutes = 1
+  }
+
   # Configuration for the ReadTreeDataset simulation
   read-tree-dataset {
     # Number of table operations to perform simultaneously
diff --git 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala
 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala
index a8a0f6e..728b7c5 100644
--- 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala
+++ 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala
@@ -39,11 +39,17 @@ object BenchmarkConfig {
     )
 
     val workloadParams = {
+      val ccConfig = workload.getConfig("create-commits")
       val rtdConfig = workload.getConfig("read-tree-dataset")
       val ctdConfig = workload.getConfig("create-tree-dataset")
       val rutdConfig = workload.getConfig("read-update-tree-dataset")
 
       WorkloadParameters(
+        CreateCommitsParameters(
+          ccConfig.getInt("table-commits-throughput"),
+          ccConfig.getInt("view-commits-throughput"),
+          ccConfig.getInt("duration-in-minutes")
+        ),
         ReadTreeDatasetParameters(
           rtdConfig.getInt("table-concurrency"),
           rtdConfig.getInt("view-concurrency")
diff --git 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala
 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/CreateCommitsParameters.scala
similarity index 55%
copy from 
benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala
copy to 
benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/CreateCommitsParameters.scala
index b6fec3c..995a0fe 100644
--- 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala
+++ 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/CreateCommitsParameters.scala
@@ -19,8 +19,19 @@
 
 package org.apache.polaris.benchmarks.parameters
 
-case class WorkloadParameters(
-    readTreeDataset: ReadTreeDatasetParameters,
-    createTreeDataset: CreateTreeDatasetParameters,
-    readUpdateTreeDataset: ReadUpdateTreeDatasetParameters
-) {}
+/**
+ * Case class to hold the parameters for the CreateCommits simulation.
+ *
+ * @param tableCommitsThroughput The number of table commits to create per 
second.
+ * @param viewCommitsThroughput The number of view commits to create per 
second.
+ * @param durationInMinutes The duration of the simulation in minutes.
+ */
+case class CreateCommitsParameters(
+    tableCommitsThroughput: Int,
+    viewCommitsThroughput: Int,
+    durationInMinutes: Int
+) {
+  require(tableCommitsThroughput >= 0, "Table commits throughput cannot be 
negative")
+  require(viewCommitsThroughput >= 0, "View commits throughput cannot be 
negative")
+  require(durationInMinutes > 0, "Duration in minutes must be positive")
+}
diff --git 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala
 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala
index b6fec3c..b392870 100644
--- 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala
+++ 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala
@@ -20,6 +20,7 @@
 package org.apache.polaris.benchmarks.parameters
 
 case class WorkloadParameters(
+    createCommits: CreateCommitsParameters,
     readTreeDataset: ReadTreeDatasetParameters,
     createTreeDataset: CreateTreeDatasetParameters,
     readUpdateTreeDataset: ReadUpdateTreeDatasetParameters
diff --git 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateCommits.scala
 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateCommits.scala
new file mode 100644
index 0000000..cd8e7c9
--- /dev/null
+++ 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateCommits.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.polaris.benchmarks.simulations
+
+import io.gatling.core.Predef._
+import io.gatling.core.structure.ScenarioBuilder
+import io.gatling.http.Predef.http
+import org.apache.polaris.benchmarks.actions.{
+  AuthenticationActions,
+  NamespaceActions,
+  TableActions,
+  ViewActions
+}
+import org.apache.polaris.benchmarks.parameters.BenchmarkConfig.config
+import org.apache.polaris.benchmarks.parameters.{
+  ConnectionParameters,
+  DatasetParameters,
+  WorkloadParameters
+}
+import org.apache.polaris.benchmarks.util.CircularIterator
+import org.slf4j.LoggerFactory
+
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference}
+import scala.concurrent.duration._
+
+class CreateCommits extends Simulation {
+  private val logger = LoggerFactory.getLogger(getClass)
+
+  // 
--------------------------------------------------------------------------------
+  // Load parameters
+  // 
--------------------------------------------------------------------------------
+  val cp: ConnectionParameters = config.connectionParameters
+  val dp: DatasetParameters = config.datasetParameters
+  val wp: WorkloadParameters = config.workloadParameters
+
+  // 
--------------------------------------------------------------------------------
+  // Helper values
+  // 
--------------------------------------------------------------------------------
+  private val accessToken: AtomicReference[String] = new AtomicReference()
+  private val shouldRefreshToken: AtomicBoolean = new AtomicBoolean(true)
+
+  private val authActions = AuthenticationActions(cp, accessToken)
+  private val tableActions = TableActions(dp, wp, accessToken)
+  private val viewActions = ViewActions(dp, wp, accessToken)
+
+  // 
--------------------------------------------------------------------------------
+  // Authentication related workloads:
+  // * Authenticate and store the access token for later use every minute
+  // * Wait for an OAuth token to be available
+  // * Stop the token refresh loop
+  // 
--------------------------------------------------------------------------------
+  val continuouslyRefreshOauthToken: ScenarioBuilder =
+    scenario("Authenticate every minute using the Iceberg REST API")
+      .asLongAs(_ => shouldRefreshToken.get())(
+        feed(authActions.feeder())
+          .exec(authActions.authenticateAndSaveAccessToken)
+          .pause(1.minute)
+      )
+
+  val waitForAuthentication: ScenarioBuilder =
+    scenario("Wait for the authentication token to be available")
+      .asLongAs(_ => accessToken.get() == null)(
+        pause(1.second)
+      )
+
+  val stopRefreshingToken: ScenarioBuilder =
+    scenario("Stop refreshing the authentication token")
+      .exec { session =>
+        shouldRefreshToken.set(false)
+        session
+      }
+
+  // 
--------------------------------------------------------------------------------
+  // Read and write workloads:
+  // * Create table commits by updating table properties
+  // * Read namespaces, tables and views
+  // 
--------------------------------------------------------------------------------
+  val tableUpdateScenario: ScenarioBuilder =
+    scenario("Create table commits by updating properties")
+      .exec(authActions.restoreAccessTokenInSession)
+      .feed(tableActions.propertyUpdateFeeder())
+      .exec(tableActions.updateTable)
+
+  // 
--------------------------------------------------------------------------------
+  // Read and write workloads:
+  // * Create table commits by updating table properties
+  // * Read namespaces, tables and views
+  // 
--------------------------------------------------------------------------------
+  val viewUpdateScenario: ScenarioBuilder =
+    scenario("Create view commits by updating properties")
+      .exec(authActions.restoreAccessTokenInSession)
+      .feed(viewActions.propertyUpdateFeeder())
+      .exec(viewActions.updateView)
+
+  private val httpProtocol = http
+    .baseUrl(cp.baseUrl)
+    .acceptHeader("application/json")
+    .contentTypeHeader("application/json")
+
+  private val tableCommitsThroughput = wp.createCommits.tableCommitsThroughput
+  private val viewCommitsThroughput = wp.createCommits.viewCommitsThroughput
+  private val durationInMinutes = wp.createCommits.durationInMinutes
+  setUp(
+    
continuouslyRefreshOauthToken.inject(atOnceUsers(1)).protocols(httpProtocol),
+    waitForAuthentication
+      .inject(atOnceUsers(1))
+      .andThen(
+        tableUpdateScenario
+          .inject(
+            
constantUsersPerSec(tableCommitsThroughput).during(durationInMinutes.minutes)
+          )
+          .protocols(httpProtocol),
+        viewUpdateScenario
+          .inject(
+            
constantUsersPerSec(viewCommitsThroughput).during(durationInMinutes.minutes)
+          )
+          .protocols(httpProtocol)
+      )
+      
.andThen(stopRefreshingToken.inject(atOnceUsers(1)).protocols(httpProtocol))
+  )
+}

Reply via email to