v2) operations against Dremio in the mixed workload

pingtimeout Mon, 09 Jun 2025 06:26:49 -0700

This is an automated email from the ASF dual-hosted git repository.

pingtimeout pushed a commit to branch benchmarks-ppc
in repository https://gitbox.apache.org/repos/asf/polaris-tools.git


commit 6313c7f08c66eecbbc0caf0c810983008e7ba970
Author: Pierre Laporte <pie...@pingtimeout.fr>
AuthorDate: Thu May 22 17:37:52 2025 +0200

    Add Search (v1/v2) operations against Dremio in the mixed workload
---
 benchmarks/README.md                               | 13 +++-
 .../src/gatling/resources/benchmark-defaults.conf  | 15 +++-
 .../benchmarks/actions/ppc/SearchActions.scala     | 79 ++++++++++++++++++++++
 .../benchmarks/parameters/BenchmarkConfig.scala    |  6 +-
 .../ReadUpdateTreeDatasetParameters.scala          | 13 ++--
 ...set.scala => ReadUpdateSearchTreeDataset.scala} | 48 +++++++++----
 .../simulations/ReadUpdateTreeDataset.scala        |  5 +-
 7 files changed, 154 insertions(+), 25 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index e681939..ea6723a 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -25,7 +25,7 @@ Benchmarks for the Polaris service using Gatling.
 
 - `org.apache.polaris.benchmarks.simulations.CreateTreeDataset`: Creates a 
test dataset with a specific structure.  It is a write-only workload designed 
to populate the system for subsequent benchmarks.
 - `org.apache.polaris.benchmarks.simulations.ReadTreeDataset`: Performs 
read-only operations to fetch namespaces, tables, and views.  Some attributes 
of the objects are also fetched.  This benchmark is intended to be used against 
a Polaris instance with a pre-existing tree dataset.  It has no side effects on 
the dataset and can be executed multiple times without any issues.
-- `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDataset`: Performs 
read and update operations against a Polaris instance populated with a test 
dataset.  It is a read/write workload that can be used to test the system's 
ability to handle concurrent read and update operations.  It is not destructive 
and does not prevent subsequent executions of `ReadTreeDataset` or 
`ReadUpdateTreeDataset`.
+- `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDataset`: Performs 
read and write operations against a Dremio PPC instance populated with a test 
dataset, as well as search operations against a Dremio instance populated with 
Wiki content.  The ratio of reads to writes and the search throughput are 
configurable.  It is not destructive and does not prevent subsequent executions 
of `ReadTreeDataset` or `ReadUpdateTreeDataset`.
 - `org.apache.polaris.benchmarks.simulations.CreateCommits`: Creates table and 
view commits at configurable rates.  This benchmark is useful for testing the 
system's ability to handle table and view commits and can be used to generate a 
history of thousands of commits for both tables and views.
 - `org.apache.polaris.benchmarks.simulations.CreateTreeWiki`: Creates wiki 
content and tags for entities previously created by `CreateTreeDataset`. This 
simulation is specific to Dremio PPC and enables the creation of more elaborate 
data products. The wiki content and tags are generated deterministically, 
ensuring consistent documentation across multiple runs with the same 
configuration.
 
@@ -76,7 +76,11 @@ Workload settings are configured under `workload`:
 ```hocon
 workload {
   read-update-tree-dataset {
-    read-write-ratio = 0.8  # Ratio of reads (0.0-1.0)
+    read-write-ratio = 0.8       # Ratio of reads to writes (0.0-1.0)
+    read-write-throughput = 100  # Number of read/write operations per second
+    search-throughput = 20       # Number of search operations per second
+    duration-in-minutes = 5      # Duration of the simulation in minutes
+    search-version = 2           # Version of Dremio Search to use (1 or 2)
   }
 
   create-tree-wiki {
@@ -106,7 +110,10 @@ http {
 
 workload {
   read-update-tree-dataset {
-    read-write-ratio = 0.8  # Ratio of reads (0.0-1.0)
+    read-write-ratio = 0.8        # 80% reads, 20% writes
+    read-write-throughput = 100   # 100 read/write operations per second
+    search-throughput = 20        # 20 search operations per second
+    duration-in-minutes = 5       # Run for 5 minutes
   }
 }
 ```
diff --git a/benchmarks/src/gatling/resources/benchmark-defaults.conf 
b/benchmarks/src/gatling/resources/benchmark-defaults.conf
index c8712cc..715c19e 100644
--- a/benchmarks/src/gatling/resources/benchmark-defaults.conf
+++ b/benchmarks/src/gatling/resources/benchmark-defaults.conf
@@ -173,13 +173,24 @@ workload {
     # Default: 0.5
     read-write-ratio = 0.5
 
-    # Number of operations to perform per second
+    # Number of read/write operations to perform per second
     # Default: 100
-    throughput = 100
+    read-write-throughput = 100
+
+    # Number of search operations to perform per second
+    # Default: 1
+    search-throughput = 1
 
     # Duration of the simulation in minutes
     # Default: 5
     duration-in-minutes = 5
+
+    # Version of the Dremio Search component to use
+    # Allowed values: 1 or 2
+    # - 1: Uses the legacy Search
+    # - 2: Uses Search v2
+    # Default: 2
+    search-version = 2
   }
 
   # Configuration for the CreateTreeWiki simulation
diff --git 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/ppc/SearchActions.scala
 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/ppc/SearchActions.scala
new file mode 100644
index 0000000..bb0a2a1
--- /dev/null
+++ 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/ppc/SearchActions.scala
@@ -0,0 +1,79 @@
+package org.apache.polaris.benchmarks.actions.ppc
+
+import com.github.javafaker.Faker
+import io.gatling.core.Predef._
+import io.gatling.core.feeder.Feeder
+import io.gatling.core.structure.ChainBuilder
+import io.gatling.http.Predef._
+import org.apache.polaris.benchmarks.parameters.{DatasetParameters, 
WorkloadParameters}
+import play.api.libs.json.Json
+
+import java.util.concurrent.atomic.AtomicReference
+
+/**
+ * Actions for performance testing search operations. This class provides 
methods to search for
+ * entities in the catalog using the Search API.
+ */
+case class SearchActions(
+    faker: Faker,
+    wp: WorkloadParameters,
+    accessToken: AtomicReference[String]
+) {
+  private val logger = org.slf4j.LoggerFactory.getLogger(getClass)
+
+  def folderSearchFeeder(): Feeder[Any] = Iterator.continually {
+    Map(
+      "query" -> faker.dog().name(),
+      "category" -> "FOLDER"
+    )
+  }
+
+  def tableSearchFeeder(): Feeder[Any] = Iterator.continually {
+    Map(
+      "query" -> faker.dog().name(),
+      "category" -> "TABLE"
+    )
+  }
+
+  def viewSearchFeeder(): Feeder[Any] = Iterator.continually {
+    Map(
+      "query" -> faker.dog().name(),
+      "category" -> "VIEW"
+    )
+  }
+
+  /**
+   * Executes a search query using the v1 API (GET request, uses legacy Search)
+   */
+  val executeSearchV1: ChainBuilder = exec(
+    http("Execute Search Query V1")
+      .get("/apiv2/datasets/search/?filter=#{query}")
+      .header("Authorization", "Bearer #{accessToken}")
+      .check(status.is(200))
+  )
+
+  /**
+   * Executes a search query using the v2 API (POST request with filters, uses 
OpenSearch)
+   */
+  val executeSearchV2: ChainBuilder = exec(
+    http("Execute Search Query V2")
+      .post("/api/v3/search")
+      .header("Authorization", "Bearer #{accessToken}")
+      .header("Content-Type", "application/json")
+      .body(
+        StringBody(
+          """{
+            |  "query": "#{query}",
+            |  "filter": "category in [\"#{category}\"]"
+            |}""".stripMargin
+        )
+      )
+      .check(status.is(200))
+  )
+
+  val executeSearch: ChainBuilder = if (wp.readUpdateTreeDataset.searchVersion 
== 1) {
+    executeSearchV1
+  } else {
+    executeSearchV2
+  }
+}
diff --git 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala
 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala
index 228383e..304ec2d 100644
--- 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala
+++ 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala
@@ -62,8 +62,10 @@ object BenchmarkConfig {
         ),
         ReadUpdateTreeDatasetParameters(
           rutdConfig.getDouble("read-write-ratio"),
-          rutdConfig.getInt("throughput"),
-          rutdConfig.getInt("duration-in-minutes")
+          rutdConfig.getInt("read-write-throughput"),
+          rutdConfig.getInt("search-throughput"),
+          rutdConfig.getInt("duration-in-minutes"),
+          rutdConfig.getInt("search-version")
         ),
         CreateTreeWikiParameters(
           ctwConfig.getInt("namespace-concurrency"),
diff --git 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ReadUpdateTreeDatasetParameters.scala
 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ReadUpdateTreeDatasetParameters.scala
index 40d4c7d..b93632d 100644
--- 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ReadUpdateTreeDatasetParameters.scala
+++ 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ReadUpdateTreeDatasetParameters.scala
@@ -23,20 +23,25 @@ package org.apache.polaris.benchmarks.parameters
  * Case class to hold the parameters for the ReadUpdateTreeDataset simulation.
  *
  * @param readWriteRatio The ratio of read operations to write operations 
(0.0-1.0).
- * @param throughput The number of operations to perform per second.
+ * @param readWriteThroughput The number of read and write operations to 
perform per second.
+ * @param searchThroughput The number of search operations to perform per 
second.
  * @param durationInMinutes The duration of the simulation in minutes.
  */
 case class ReadUpdateTreeDatasetParameters(
     readWriteRatio: Double,
-    throughput: Int,
-    durationInMinutes: Int
+    readWriteThroughput: Int,
+    searchThroughput: Int,
+    durationInMinutes: Int,
+    searchVersion: Int
 ) {
   require(
     readWriteRatio >= 0.0 && readWriteRatio <= 1.0,
     "Read/write ratio must be between 0.0 and 1.0 inclusive"
   )
-  require(throughput >= 0, "Throughput cannot be negative")
+  require(readWriteThroughput >= 0, "Read/write throughput cannot be negative")
+  require(searchThroughput >= 0, "Search throughput cannot be negative")
   require(durationInMinutes > 0, "Duration in minutes must be positive")
+  require(searchVersion == 1 || searchVersion == 2, "Search version must be 
either 1 or 2")
 
   val gatlingReadRatio: Double = readWriteRatio * 100
   val gatlingWriteRatio: Double = (1 - readWriteRatio) * 100
diff --git 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala
 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateSearchTreeDataset.scala
similarity index 79%
copy from 
benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala
copy to 
benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateSearchTreeDataset.scala
index 13b373e..9f3d6e6 100644
--- 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala
+++ 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateSearchTreeDataset.scala
@@ -19,10 +19,12 @@
 
 package org.apache.polaris.benchmarks.simulations
 
+import com.github.javafaker.Faker
 import io.gatling.core.Predef._
 import io.gatling.core.structure.ScenarioBuilder
 import io.gatling.http.Predef._
 import org.apache.polaris.benchmarks.actions._
+import org.apache.polaris.benchmarks.actions.ppc.SearchActions
 import org.apache.polaris.benchmarks.parameters.BenchmarkConfig.config
 import org.apache.polaris.benchmarks.parameters.{
   ConnectionParameters,
@@ -33,15 +35,15 @@ import org.apache.polaris.benchmarks.util.CircularIterator
 import org.slf4j.LoggerFactory
 
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference}
+import java.util.{Locale, Random}
 import scala.concurrent.duration._
 
 /**
- * This simulation tests read and update operations on an existing dataset.
- *
- * The ratio of read operations to write operations is controlled by the 
readWriteRatio parameter in
- * the ReadUpdateTreeDatasetParameters.
+ * This simulation tests read and update operations on an existing dataset, by 
sending queries to
+ * Dremio PPC directly. It also tests search operations by sending queries to 
Dremio Search using
+ * terms generated from the wiki content.
  */
-class ReadUpdateTreeDataset extends Simulation {
+class ReadUpdateSearchTreeDataset extends Simulation {
   private val logger = LoggerFactory.getLogger(getClass)
 
   // 
--------------------------------------------------------------------------------
@@ -57,10 +59,12 @@ class ReadUpdateTreeDataset extends Simulation {
   private val accessToken: AtomicReference[String] = new AtomicReference()
   private val shouldRefreshToken: AtomicBoolean = new AtomicBoolean(true)
 
+  private val faker = new Faker(Locale.ENGLISH, new Random(0))
   private val authActions = AuthenticationActions(cp, accessToken)
   private val nsActions = NamespaceActions(dp, wp, accessToken)
   private val tblActions = TableActions(dp, wp, accessToken)
   private val viewActions = ViewActions(dp, wp, accessToken)
+  private val searchActions = SearchActions(faker, wp, accessToken)
 
   private val nsListFeeder = new 
CircularIterator(nsActions.namespaceIdentityFeeder)
   private val nsExistsFeeder = new 
CircularIterator(nsActions.namespaceIdentityFeeder)
@@ -77,6 +81,11 @@ class ReadUpdateTreeDataset extends Simulation {
   private val viewFetchFeeder = new 
CircularIterator(viewActions.viewFetchFeeder)
   private val viewUpdateFeeder = viewActions.propertyUpdateFeeder()
 
+  // Search feeders
+  private val folderSearchFeeder = searchActions.folderSearchFeeder()
+  private val tableSearchFeeder = searchActions.tableSearchFeeder()
+  private val viewSearchFeeder = searchActions.viewSearchFeeder()
+
   // 
--------------------------------------------------------------------------------
   // Authentication related workloads:
   // * Authenticate and store the access token for later use every minute
@@ -105,7 +114,9 @@ class ReadUpdateTreeDataset extends Simulation {
       }
 
   // 
--------------------------------------------------------------------------------
-  // Workload: Randomly read and write entities
+  // Workloads:
+  // * Randomly read and write entities using the configured ratio
+  // * Search entities using a 33%/33%/34% ratio between namespaces, tables, 
and views
   // 
--------------------------------------------------------------------------------
   val readWriteScenario: ScenarioBuilder =
     scenario("Read and write entities using the Iceberg REST API")
@@ -133,6 +144,17 @@ class ReadUpdateTreeDataset extends Simulation {
         )
       )
 
+  val searchScenario: ScenarioBuilder =
+    scenario("Search entities")
+      .group("Search")(
+        exec(authActions.restoreAccessTokenInSession)
+          .randomSwitch(
+            33.0 -> 
exec(feed(folderSearchFeeder).exec(searchActions.executeSearch)),
+            33.0 -> 
exec(feed(tableSearchFeeder).exec(searchActions.executeSearch)),
+            34.0 -> 
exec(feed(viewSearchFeeder).exec(searchActions.executeSearch))
+          )
+      )
+
   // 
--------------------------------------------------------------------------------
   // Build up the HTTP protocol configuration and set up the simulation
   // 
--------------------------------------------------------------------------------
@@ -147,8 +169,9 @@ class ReadUpdateTreeDataset extends Simulation {
     .disableCaching
 
   // Get the configured throughput and duration
-  private val throughput = wp.readUpdateTreeDataset.throughput
-  private val durationInMinutes = wp.readUpdateTreeDataset.durationInMinutes
+  private val rwThroughput = wp.readUpdateTreeDataset.readWriteThroughput
+  private val searchThroughput = wp.readUpdateTreeDataset.searchThroughput
+  private val duration = wp.readUpdateTreeDataset.durationInMinutes
 
   setUp(
     
continuouslyRefreshOauthToken.inject(atOnceUsers(1)).protocols(dremioHttpProtocol),
@@ -156,10 +179,11 @@ class ReadUpdateTreeDataset extends Simulation {
       .inject(atOnceUsers(1))
       .andThen(
         readWriteScenario
-          .inject(
-            
constantUsersPerSec(throughput).during(durationInMinutes.minutes).randomized
-          )
-          .protocols(polarisHttpProtocol)
+          .inject(constantUsersPerSec(rwThroughput).during(duration.minutes))
+          .protocols(polarisHttpProtocol),
+        searchScenario
+          
.inject(constantUsersPerSec(searchThroughput).during(duration.minutes))
+          .protocols(dremioHttpProtocol)
       )
       
.andThen(stopRefreshingToken.inject(atOnceUsers(1)).protocols(dremioHttpProtocol))
   )
diff --git 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala
 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala
index 13b373e..d1963e3 100644
--- 
a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala
+++ 
b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala
@@ -147,7 +147,8 @@ class ReadUpdateTreeDataset extends Simulation {
     .disableCaching
 
   // Get the configured throughput and duration
-  private val throughput = wp.readUpdateTreeDataset.throughput
+  private val readWriteThroughput = 
wp.readUpdateTreeDataset.readWriteThroughput
+  private val searchThroughput = wp.readUpdateTreeDataset.searchThroughput
   private val durationInMinutes = wp.readUpdateTreeDataset.durationInMinutes
 
   setUp(
@@ -157,7 +158,7 @@ class ReadUpdateTreeDataset extends Simulation {
       .andThen(
         readWriteScenario
           .inject(
-            
constantUsersPerSec(throughput).during(durationInMinutes.minutes).randomized
+            
constantUsersPerSec(readWriteThroughput).during(durationInMinutes.minutes).randomized
           )
           .protocols(polarisHttpProtocol)
       )

(polaris-tools) 12/15: Add Search (v1/v2) operations against Dremio in the mixed workload

Reply via email to