This is an automated email from the ASF dual-hosted git repository. jbonofre pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/polaris-tools.git
The following commit(s) were added to refs/heads/main by this push: new 1859a16 Add Polaris benchmarks (#2) 1859a16 is described below commit 1859a1605eba068f3cf6c4614925a058b91494e8 Author: Pierre Laporte <pie...@pingtimeout.fr> AuthorDate: Thu Apr 10 18:21:04 2025 +0200 Add Polaris benchmarks (#2) * Add Polaris benchmarks * Code review * Use seeded random for buffered circular iterator * Remove print statement * Remove TableCommitsCreator simulation for now as it is not ready yet * Update benchmarks/README.md Co-authored-by: Robert Stupp <sn...@snazy.de> --------- Co-authored-by: Robert Stupp <sn...@snazy.de> --- benchmarks/.gitignore | 3 + benchmarks/.scalafmt.conf | 44 ++++ benchmarks/README.md | 242 +++++++++++++++++++ benchmarks/build.gradle.kts | 29 +++ .../gradle/wrapper/gradle-wrapper.properties | 8 + benchmarks/gradlew | 248 ++++++++++++++++++++ benchmarks/gradlew.bat | 92 ++++++++ benchmarks/settings.gradle.kts | 1 + .../src/gatling/resources/benchmark-defaults.conf | 143 ++++++++++++ benchmarks/src/gatling/resources/gatling.conf | 10 + benchmarks/src/gatling/resources/logback-test.xml | 39 ++++ .../polaris/benchmarks/NAryTreeBuilder.scala | 104 +++++++++ .../polaris/benchmarks/RetryOnHttpCodes.scala | 65 ++++++ .../benchmarks/actions/AuthenticationActions.scala | 100 ++++++++ .../benchmarks/actions/CatalogActions.scala | 135 +++++++++++ .../benchmarks/actions/EntityProperties.scala | 29 +++ .../benchmarks/actions/NamespaceActions.scala | 223 ++++++++++++++++++ .../polaris/benchmarks/actions/TableActions.scala | 258 +++++++++++++++++++++ .../polaris/benchmarks/actions/ViewActions.scala | 244 +++++++++++++++++++ .../benchmarks/parameters/BenchmarkConfig.scala | 73 ++++++ .../parameters/ConnectionParameters.scala | 37 +++ .../benchmarks/parameters/DatasetParameters.scala | 81 +++++++ .../benchmarks/parameters/WorkloadParameters.scala | 51 ++++ .../benchmarks/simulations/CreateTreeDataset.scala | 121 ++++++++++ .../simulations/CreateTreeDatasetConcurrent.scala | 57 +++++ .../simulations/CreateTreeDatasetSequential.scala | 50 ++++ .../benchmarks/simulations/ReadTreeDataset.scala | 143 ++++++++++++ .../simulations/ReadUpdateTreeDataset.scala | 110 +++++++++ .../ReadUpdateTreeDatasetConcurrent.scala | 41 ++++ .../ReadUpdateTreeDatasetSequential.scala | 41 ++++ .../polaris/benchmarks/util/CircularIterator.scala | 53 +++++ 31 files changed, 2875 insertions(+) diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore new file mode 100644 index 0000000..7ab3f43 --- /dev/null +++ b/benchmarks/.gitignore @@ -0,0 +1,3 @@ +.gradle +build +bin/ diff --git a/benchmarks/.scalafmt.conf b/benchmarks/.scalafmt.conf new file mode 100644 index 0000000..f2f74c1 --- /dev/null +++ b/benchmarks/.scalafmt.conf @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Scalafmt is used to reformat Gatling benchmarks from the benchmarks/ directory + +version = 3.9.3 +runner.dialect = scala213 + +maxColumn = 100 + +preset = default +align.preset = some + +assumeStandardLibraryStripMargin = true +align.stripMargin = true + +rewrite.rules = [ + AvoidInfix + RedundantBraces + RedundantParens + SortModifiers + PreferCurlyFors + Imports +] + +rewrite.imports.sort = original +docstrings.style = Asterisk +docstrings.wrap = fold diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..f313084 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,242 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Polaris Benchmarks + +Benchmarks for the Polaris service using Gatling. + +## Available Benchmarks + +### Dataset Creation Benchmark + +The CreateTreeDataset benchmark creates a test dataset with a specific structure. It exists in two variants: + +- `org.apache.polaris.benchmarks.simulations.CreateTreeDatasetSequential`: Creates entities one at a time +- `org.apache.polaris.benchmarks.simulations.CreateTreeDatasetConcurrent`: Creates up to 50 entities simultaneously + +These are write-only workloads designed to populate the system for subsequent benchmarks. + +### Read/Update Benchmark + +The ReadUpdateTreeDataset benchmark tests read and update operations on an existing dataset. It exists in two variants: + +- `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDatasetSequential`: Performs read/update operations one at a time +- `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDatasetConcurrent`: Performs up to 20 read/update operations simultaneously + +These benchmarks can only be run after using CreateTreeDataset to populate the system. + +## Parameters + +All parameters are configured through the [benchmark-defaults.conf](src/gatling/resources/benchmark-defaults.conf) file located in `src/gatling/resources/`. The configuration uses the [Typesafe Config](https://github.com/lightbend/config) format. The reference configuration file contains default values as well as documentation for each parameter. + +### Dataset Structure Parameters + +These parameters must be consistent across all benchmarks and are configured under `dataset.tree`: + +```hocon +dataset.tree { + num-catalogs = 1 # Number of catalogs to create + namespace-width = 2 # Width of the namespace tree + namespace-depth = 4 # Depth of the namespace tree + tables-per-namespace = 5 # Tables per namespace + views-per-namespace = 3 # Views per namespace + columns-per-table = 10 # Columns per table + columns-per-view = 10 # Columns per view + default-base-location = "file:///tmp/polaris" # Base location for datasets + namespace-properties = 10 # Number of properties to add to each namespace + table-properties = 10 # Number of properties to add to each table + view-properties = 10 # Number of properties to add to each view + max-tables = -1 # Cap on total tables (-1 for no cap). Must be less than N^(D-1) * tables-per-namespace + max-views = -1 # Cap on total views (-1 for no cap). Must be less than N^(D-1) * views-per-namespace +} +``` + +### Connection Parameters + +Connection settings are configured under `http` and `auth`: + +```hocon +http { + base-url = "http://localhost:8181" # Service URL +} + +auth { + client-id = null # Required: OAuth2 client ID + client-secret = null # Required: OAuth2 client secret +} +``` + +### Workload Parameters + +Workload settings are configured under `workload`: + +```hocon +workload { + read-write-ratio = 0.8 # Ratio of reads (0.0-1.0) +} +``` + +## Running the Benchmarks + +The benchmark uses [typesafe-config](https://github.com/lightbend/config) for configuration management. Default settings are in `src/gatling/resources/benchmark-defaults.conf`. This file should not be modified directly. + +To customize the benchmark settings, create your own `application.conf` file and specify it using the `-Dconfig.file` parameter. Your settings will override the default values. + +Example `application.conf`: +```hocon +auth { + client-id = "your-client-id" + client-secret = "your-client-secret" +} + +http { + base-url = "http://your-polaris-instance:8181" +} + +workload { + read-write-ratio = 0.8 +} +``` + +Run benchmarks with your configuration: + +```bash +# Sequential dataset creation +./gradlew gatlingRun --simulation org.apache.polaris.benchmarks.simulations.CreateTreeDatasetSequential \ + -Dconfig.file=./application.conf + +# Concurrent dataset creation +./gradlew gatlingRun --simulation org.apache.polaris.benchmarks.simulations.CreateTreeDatasetConcurrent \ + -Dconfig.file=./application.conf +``` + +A message will show the location of the Gatling report: +``` +Reports generated in: ./benchmarks/build/reports/gatling/<simulation-name>/index.html +``` + +### Example Polaris server startup + +For repeated testing and benchmarking purposes it's convenient to have fixed client-ID + client-secret combinations. **The following example is ONLY for testing and benchmarking against an airgapped Polaris instance** + +```bash +# Start Polaris with the fixed client-ID/secret admin/admin +# DO NEVER EVER USE THE FOLLOWING FOR ANY NON-AIRGAPPED POLARIS INSTANCE !! +./gradlew :polaris-quarkus-server:quarkusBuild && java \ + -Dpolaris.bootstrap.credentials=POLARIS,admin,admin \ + -Djava.security.manager=allow \ + -jar quarkus/server/build/quarkus-app/quarkus-run.jar +``` + +With the above you can run the benchmarks using a configuration file with `client-id = "admin"` and `client-secret = "admin"` - meant only for convenience in a fully airgapped system. + +# Test Dataset + +The benchmarks use synthetic procedural datasets that are generated deterministically at runtime. This means that given the same input parameters, the exact same dataset structure will always be generated. This approach allows generating large volumes of test data without having to store it, while ensuring reproducible benchmark results across different runs. + +The diagrams below describe the data sets that are used in benchmarks. Note that the benchmark dataset may not cover all Polaris features. + +## Generation rules + +The dataset has a tree shape. At the root of the tree is a Polaris realm that must exist before the dataset is created. + +An arbitrary number of catalogs can be created under the realm. However, only the first catalog (`C_0`) is used for the rest of the dataset. + +The namespaces part of the dataset is a complete `N`-ary tree. That is, it starts with a root namespace (`NS_0`) and then, each namespace contains exactly `0` or `N` children namespaces. The width as well as the depth of the namespaces tree are configurable. The total number of namespaces can easily be calculated with the following formulae, where `N` is the tree width and `D` is the total tree depth, including the root: + +$$\text{Total number of namespaces} = +\begin{cases} + \frac{N^{D} - 1}{N - 1} & \mbox{if } N \gt 1 \\ + D & \mbox{if } N = 1 +\end{cases}$$ + +The tables are created under the leaves of the tree. That is, they are put under the namespaces with no child namespace. The number of tables that is created under each leaf namespace is configurable. The total number of tables can easily be calculated with the following formulae, where `N` is the tree width, `D` is the total tree depth, and `T` is the number of tables per leaf namespace: + +Total number of tables = *N*<sup>*D* − 1</sup> \* *T* + +The views are created alongside the tables. The number of views that is created under each leaf namespace is also configurable. The total number of views can easily be calculated with the following formulae, where `N` is the tree width, `D` is the total tree depth, `V` is the number of views per leaf namespace: + +Total number of tables = *N*<sup>*D* − 1</sup> \* *V* + +## Binary tree example + +The diagram below shows an example of a test dataset with the following properties: + +- Number of catalogs: `3` +- Namespace tree width (`N`): `2` (a binary tree) +- Namespace tree depth (`D`): `3` +- Tables per namespace (`T`): `5` +- Views per namespace (`V`): `3` + + + +Using the formula from the previous section, we can calculate the total number of namespaces and the total number of tables as follows: + +$$\text{Total number of namespaces} = \frac{2^{3} - 1}{2 - 1} = 7$$ + +Total number of tables = 2<sup>3 − 1</sup> \* 5 = 20 + +## 10-ary tree example + +The diagram below shows an example of a test dataset with the following properties: + +- Number of catalogs: `1` +- Namespace tree width (`N`): `10` +- Namespace tree depth (`D`): `2` +- Tables per namespace (`T`): `3` +- Views per namespace (`V`): `3` + + + +Using the formula from the previous section, we can calculate the total number of namespaces and the total number of tables as follows: + +$$\text{Total number of namespaces} = \frac{10^{2} - 1}{10 - 1} = 11$$ + +Total number of tables = 10<sup>2 − 1</sup> \* 3 = 30 + +## 1-ary tree example + +The diagram below shows an example of a test dataset with the following properties: + +- Number of catalogs: `1` +- Namespace tree width (`N`): `1` +- Namespace tree depth (`D`): `1000` +- Tables per namespace (`T`): `7` +- Views per namespace (`V`): `4` + + + +Using the formula from the previous section, we can calculate the total number of namespaces and the total number of tables as follows: + +Total number of namespaces = 1000 + +Total number of tables = 1<sup>1000 − 1</sup> \* 7 = 7 + +## Size + +The data set size can be adjusted as well. Each namespace is associated with an arbitrary number of dummy properties. Similarly, each table is associated with an arbitrary number of dummy columns and properties. + +The diagram below shows sample catalog, namespace and table definition given the following properties: + +- Default base location: `file:///tmp/polaris` +- Number of namespace properties: `100` +- Number of columns per table: `999` +- Number of table properties: `59` + + diff --git a/benchmarks/build.gradle.kts b/benchmarks/build.gradle.kts new file mode 100644 index 0000000..3f68169 --- /dev/null +++ b/benchmarks/build.gradle.kts @@ -0,0 +1,29 @@ +plugins { + scala + id("io.gatling.gradle") version "3.13.5.2" + id("com.diffplug.spotless") version "7.0.2" +} + +description = "Polaris Iceberg REST API performance tests" + +tasks.withType<ScalaCompile> { + scalaCompileOptions.forkOptions.apply { + jvmArgs = listOf("-Xss100m") // Scala compiler may require a larger stack size when compiling Gatling simulations + } +} + +dependencies { + gatling("com.typesafe.play:play-json_2.13:2.9.4") + gatling("com.typesafe:config:1.4.3") +} + +repositories { + mavenCentral() +} + +spotless { + scala { + // Use scalafmt for Scala formatting + scalafmt("3.9.3").configFile(".scalafmt.conf") + } +} diff --git a/benchmarks/gradle/wrapper/gradle-wrapper.properties b/benchmarks/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..36e4933 --- /dev/null +++ b/benchmarks/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,8 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionSha256Sum=20f1b1176237254a6fc204d8434196fa11a4cfb387567519c61556e8710aed78 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/benchmarks/gradlew b/benchmarks/gradlew new file mode 100755 index 0000000..fcb6fca --- /dev/null +++ b/benchmarks/gradlew @@ -0,0 +1,248 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/benchmarks/gradlew.bat b/benchmarks/gradlew.bat new file mode 100644 index 0000000..6689b85 --- /dev/null +++ b/benchmarks/gradlew.bat @@ -0,0 +1,92 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/benchmarks/settings.gradle.kts b/benchmarks/settings.gradle.kts new file mode 100644 index 0000000..912ad48 --- /dev/null +++ b/benchmarks/settings.gradle.kts @@ -0,0 +1 @@ +rootProject.name = "Apache Polaris Benchmarks" diff --git a/benchmarks/src/gatling/resources/benchmark-defaults.conf b/benchmarks/src/gatling/resources/benchmark-defaults.conf new file mode 100644 index 0000000..8aae51c --- /dev/null +++ b/benchmarks/src/gatling/resources/benchmark-defaults.conf @@ -0,0 +1,143 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# HTTP connection settings +http { + # Base URL of the Polaris service + # Default: "http://localhost:8181" + base-url = "http://localhost:8181" +} + +# Authentication settings +auth { + # OAuth2 client ID for authentication + # Required: Must be provided in configuration + client-id = null + + # OAuth2 client secret for authentication + # Required: Must be provided in configuration + client-secret = null +} + +# Dataset tree structure configuration +dataset.tree { + # Number of catalogs to create. Only the first catalog (C_0) will contain the test dataset. + # Default: 1 + num-catalogs = 1 + + # Width of the namespace tree (N). Each namespace will have exactly N children + # For N > 1: Total namespaces = (N^D - 1)/(N - 1) + # For N = 1: Total namespaces = D + # Default: 2 + namespace-width = 2 + + # Depth of the namespace tree (D), including root namespace + # Total number of leaf namespaces = N^(D-1) + # Default: 4 + namespace-depth = 4 + + # Number of tables to create per leaf namespace (T) + # Total tables = N^(D-1) * T + # Default: 5 + tables-per-namespace = 5 + + # Maximum number of tables to create. This parameter can be user to cap the + # number of tables otherwise created by the formula above. Must be less than + # N^(D-1) * T (computed total from tables-per-namespace). Use -1 to create + # all computed tables. + # + # Examples: + # * namespace-width=2, namespace-depth=20, tables-per-namespace=4 and max-tables=-1 + # will create 1,048,575 namespaces (of which 524,288 are leaf namespaces) and 2,097,152 tables. + # * namespace-width=2, namespace-depth=20, tables-per-namespace=4 and max-tables=1000000 + # will create 1,048,575 namespaces (of which 524,288 are leaf namespaces) and 1,000,000 tables. + # + # Default: -1 + max-tables = -1 + + # Number of views to create per leaf namespace (V) + # Total views = N^(D-1) * V + # Default: 3 + views-per-namespace = 3 + + # Maximum number of views to create. This parameter can be user to cap the + # number of views otherwise created by the formula above. Must be less than + # N^(D-1) * V (computed total from views-per-namespace). Use -1 to create + # all computed views. + # + # Examples: + # * namespace-width=2, namespace-depth=20, views-per-namespace=4 and max-views=-1 + # will create 1,048,575 namespaces (of which 524,288 are leaf namespaces) and 2,097,152 views. + # * namespace-width=2, namespace-depth=20, views-per-namespace=4 and max-views=1000000 + # will create 1,048,575 namespaces (of which 524,288 are leaf namespaces) and 1,000,000 views. + # + # Default: -1 + max-views = -1 + + # Number of columns per table + # Default: 10 + columns-per-table = 10 + + # Number of columns per view + # Default: 10 + columns-per-view = 10 + + # Base location for datasets + # Default: "file:///tmp/polaris" + default-base-location = "file:///tmp/polaris" + + # Number of properties to add to each namespace + # Default: 10 + namespace-properties = 10 + + # Number of properties to add to each table + # Default: 10 + table-properties = 10 + + # Number of properties to add to each view + # Default: 10 + view-properties = 10 +} + +# Workload configuration +workload { + # Ratio of read operations to write operations + # Range: 0.0 to 1.0 where: + # - 0.0 means 100% writes + # - 1.0 means 100% reads + # Example: 0.8 means 80% reads and 20% writes + # Required: Must be provided through environment variable READ_WRITE_RATIO + read-write-ratio = 0.5 + + # Seed used for random number generation + # Default: 1 + seed = 1 + + # Number of property updates to perform per individual namespace + # Default: 5 + updates-per-namespace = 5 + + # Number of property updates to perform per individual table + # Default: 10 + updates-per-table = 10 + + # Number of property updates to perform per individual view + # Default: 10 + updates-per-view = 10 +} diff --git a/benchmarks/src/gatling/resources/gatling.conf b/benchmarks/src/gatling/resources/gatling.conf new file mode 100644 index 0000000..e3fc90f --- /dev/null +++ b/benchmarks/src/gatling/resources/gatling.conf @@ -0,0 +1,10 @@ +gatling { + charting { + indicators { + percentile1 = 25 + percentile2 = 50 + percentile3 = 75 + percentile4 = 99 + } + } +} diff --git a/benchmarks/src/gatling/resources/logback-test.xml b/benchmarks/src/gatling/resources/logback-test.xml new file mode 100644 index 0000000..c9227d3 --- /dev/null +++ b/benchmarks/src/gatling/resources/logback-test.xml @@ -0,0 +1,39 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> +<configuration> + <appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> + <encoder> + <pattern>%d{HH:mm:ss.SSS} [%-5level] %logger{15} - %msg%n%rEx</pattern> + </encoder> + <immediateFlush>false</immediateFlush> + </appender> + + <!-- uncomment and set to DEBUG to log all failing HTTP requests --> + <!-- uncomment and set to TRACE to log all HTTP requests --> + <!-- <logger name="io.gatling.http.engine.response" level="DEBUG" />--> + + <logger name="org.apache.polaris.benchmarks" level="DEBUG"/> + + <root level="WARN"> + <appender-ref ref="CONSOLE"/> + </root> +</configuration> diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/NAryTreeBuilder.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/NAryTreeBuilder.scala new file mode 100644 index 0000000..bf9ed54 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/NAryTreeBuilder.scala @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks + +/** + * Builds a complete N-ary tree structure for organizing namespaces in synthetic datasets. + * + * This builder is used to create a hierarchical namespace structure where each non-leaf namespace + * has exactly `nsWidth` child namespaces. The tree structure is used to organize tables and views + * in a deterministic way. Tables and views are placed in the leaf namespaces of the tree. + * + * @param nsWidth The number of children each non-leaf namespace will have (N) + * @param nsDepth The total depth of the tree, including the root namespace + */ +case class NAryTreeBuilder(nsWidth: Int, nsDepth: Int) { + + /** + * Computes the path from the root node to the given ordinal. + * + * @param ordinal the ordinal of the node + * @param acc the accumulator for the path + * @return the path from the root node (included) to the given ordinal (included) + */ + @scala.annotation.tailrec + final def pathToRoot(ordinal: Int, acc: List[Int] = Nil): List[Int] = + if (ordinal == 0) { + ordinal :: acc + } else { + val parent = (ordinal - 1) / nsWidth + pathToRoot(parent, ordinal :: acc) + } + + /** + * Calculates the depth of a node in the n-ary tree based on its ordinal. + * + * @param ordinal The ordinal of the node. + * @return The depth of the node in the tree. + */ + def depthOf(ordinal: Int): Int = { + if (ordinal == 0) return 0 + if (nsWidth == 1) return ordinal + + // Using the formula: floor(log_n((x * (n-1)) + 1)) + val numerator = (ordinal * (nsWidth - 1)) + 1 + (math.log(numerator) / math.log(nsWidth)).floor.toInt + } + + /** + * Calculate the total number of nodes in a complete n-ary tree. + * + * @return The total number of nodes in the tree. + */ + val numberOfNodes: Int = { + // The sum of nodes from level 0 to level d-1 is (n^(d+1) - 1) / (n - 1) if n > 1 + // Else, the sum of nodes from level 0 to level d-1 is d + if (nsWidth == 1) { + nsDepth + } else { + ((math.pow(nsWidth, nsDepth) - 1) / (nsWidth - 1)).toInt + } + } + + /** + * Returns a range of ordinals for the nodes on the last level of a complete n-ary tree. + * + * @return The range of ordinals for the nodes on the last level of the tree. + */ + val lastLevelOrdinals: Range = { + val lastLevel = nsDepth - 1 + if (nsWidth == 1) { + // For a unary tree, the only node at depth d is simply the node with ordinal d. + Range.inclusive(lastLevel, lastLevel) + } else { + // The sum of nodes from level 0 to level d-1 is (n^d - 1) / (n - 1) + val start = ((math.pow(nsWidth, lastLevel) - 1) / (nsWidth - 1)).toInt + // The sum of nodes from level 0 to level d is (n^(d+1) - 1) / (n - 1) + // Therefore, the last ordinal at depth d is: + val end = (((math.pow(nsWidth, lastLevel + 1) - 1) / (nsWidth - 1)).toInt) - 1 + Range.inclusive(start, end) + } + } + + val numberOfLastLevelElements: Int = { + val lastLevel = nsDepth - 1 + math.pow(nsWidth, lastLevel).toInt + } +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/RetryOnHttpCodes.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/RetryOnHttpCodes.scala new file mode 100644 index 0000000..9365615 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/RetryOnHttpCodes.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks + +import io.gatling.core.Predef._ +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import io.gatling.http.request.builder.HttpRequestBuilder +import org.slf4j.LoggerFactory + +object RetryOnHttpCodes { + private val logger = LoggerFactory.getLogger(getClass) + + implicit class HttpRequestBuilderWithStatusSave(val httpRequestBuilder: HttpRequestBuilder) { + def saveHttpStatusCode(): HttpRequestBuilder = + httpRequestBuilder.check(status.saveAs("lastHttpStatusCode")) + } + + def retryOnHttpStatus(maxRetries: Int, statusCodes: Set[Int], counterName: String = "httpRetry")( + httpRequestBuilder: HttpRequestBuilder + ): ChainBuilder = + exec(session => session.set(counterName, 0)) + .exec(session => session.set("lastHttpStatusCode", -1)) + .asLongAs(session => + session(counterName).as[Int] == 0 || ( + session(counterName).as[Int] < maxRetries && + statusCodes.contains(session("lastHttpStatusCode").as[Int]) + ) + ) { + exec(session => session.set(counterName, session(counterName).as[Int] + 1)) + .exec(httpRequestBuilder) + } + .doIf(session => + session(counterName).as[Int] >= maxRetries && statusCodes.contains( + session("lastHttpStatusCode").as[Int] + ) + ) { + exec { session => + logger.warn( + s"""Max retries (${maxRetries}) attempted for chain "${counterName}". Last HTTP status code: ${session( + "lastHttpStatusCode" + ).as[Int]}""" + ) + session + } + } + .exec(session => session.removeAll("lastHttpStatusCode", counterName)) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/AuthenticationActions.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/AuthenticationActions.scala new file mode 100644 index 0000000..a65d2b8 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/AuthenticationActions.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import io.gatling.core.Predef._ +import io.gatling.core.feeder.Feeder +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.RetryOnHttpCodes.{ + retryOnHttpStatus, + HttpRequestBuilderWithStatusSave +} +import org.apache.polaris.benchmarks.parameters.ConnectionParameters +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.AtomicReference + +/** + * Actions for performance testing authentication operations. This class provides methods to + * authenticate and manage access tokens for API requests. + * + * @param cp Connection parameters containing client credentials + * @param accessToken Reference to the authentication token shared across actions + * @param maxRetries Maximum number of retry attempts for failed operations + * @param retryableHttpCodes HTTP status codes that should trigger a retry + */ +case class AuthenticationActions( + cp: ConnectionParameters, + accessToken: AtomicReference[String], + maxRetries: Int = 10, + retryableHttpCodes: Set[Int] = Set(500) +) { + private val logger = LoggerFactory.getLogger(getClass) + + /** + * Creates a Gatling Feeder that provides authentication credentials. The feeder continuously + * supplies client ID and client secret from the connection parameters for use in authentication + * requests. + * + * @return An iterator providing client credentials + */ + def feeder(): Feeder[String] = Iterator.continually( + Map( + "clientId" -> cp.clientId, + "clientSecret" -> cp.clientSecret + ) + ) + + /** + * Authenticates using client credentials and saves the access token as a session attribute. The + * credentials are defined in the [[AuthenticationActions.feeder]]. This operation performs an + * OAuth2 client credentials flow, requesting full principal roles, and stores the received access + * token in both the Gatling session and the shared AtomicReference. + * + * There is no limit to the maximum number of users that can authenticate concurrently. + */ + val authenticateAndSaveAccessToken: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Authenticate")( + http("Authenticate") + .post("/api/catalog/v1/oauth/tokens") + .header("Content-Type", "application/x-www-form-urlencoded") + .formParam("grant_type", "client_credentials") + .formParam("client_id", "#{clientId}") + .formParam("client_secret", "#{clientSecret}") + .formParam("scope", "PRINCIPAL_ROLE:ALL") + .saveHttpStatusCode() + .check(status.is(200)) + .check(jsonPath("$.access_token").saveAs("accessToken")) + ) + .exec { session => + if (session.contains("accessToken")) + accessToken.set(session("accessToken").as[String]) + session + } + + /** + * Restores the current access token from the shared reference into the Gatling session. This + * operation is useful when a scenario needs to reuse an authentication token from a previous + * scenario. + */ + val restoreAccessTokenInSession: ChainBuilder = + exec(session => session.set("accessToken", accessToken.get())) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/CatalogActions.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/CatalogActions.scala new file mode 100644 index 0000000..8878b5a --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/CatalogActions.scala @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import io.gatling.core.Predef._ +import io.gatling.core.feeder.Feeder +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.RetryOnHttpCodes.{ + retryOnHttpStatus, + HttpRequestBuilderWithStatusSave +} +import org.apache.polaris.benchmarks.parameters.DatasetParameters +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.AtomicReference + +/** + * Actions for performance testing catalog operations in Apache Iceberg. This class provides methods + * to create and fetch catalogs. + * + * @param dp Dataset parameters controlling the dataset generation + * @param accessToken Reference to the authentication token for API requests + * @param maxRetries Maximum number of retry attempts for failed operations + * @param retryableHttpCodes HTTP status codes that should trigger a retry + */ +case class CatalogActions( + dp: DatasetParameters, + accessToken: AtomicReference[String], + maxRetries: Int = 10, + retryableHttpCodes: Set[Int] = Set(409, 500) +) { + private val logger = LoggerFactory.getLogger(getClass) + + /** + * Creates a Gatling Feeder that generates catalog names and their default storage locations. Each + * catalog will be named "C_n" where n is a sequential number, and will have a corresponding + * storage location under the configured base path. + * + * @return An iterator providing catalog names and their storage locations + */ + def feeder(): Feeder[String] = Iterator + .from(0) + .map { i => + val catalogName = s"C_$i" + Map( + "catalogName" -> catalogName, + "defaultBaseLocation" -> s"${dp.defaultBaseLocation}/$catalogName" + ) + } + .take(dp.numCatalogs) + + /** + * Creates a new Iceberg catalog with FILE storage type. The catalog is created as an INTERNAL + * type with a name and a default base location that are defined in the [[CatalogActions.feeder]]. + * This represents the fundamental operation of establishing a new catalog in an Iceberg + * deployment. + * + * There is no limit to the number of users that can create catalogs concurrently. + */ + val createCatalog: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Create catalog")( + http("Create Catalog") + .post("/api/management/v1/catalogs") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + """{ + | "catalog": { + | "type": "INTERNAL", + | "name": "#{catalogName}", + | "properties": { + | "default-base-location": "#{defaultBaseLocation}" + | }, + | "storageConfigInfo": { + | "storageType": "FILE" + | } + | } + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(201)) + ) + + /** + * Retrieves details of a specific Iceberg catalog by name. The catalog name is defined in the + * [[CatalogActions.feeder]]. Some basic properties are verified, like the catalog type, storage + * settings, and base location. + * + * There is no limit to the number of users that can fetch catalogs concurrently. + */ + val fetchCatalog: ChainBuilder = exec( + http("Fetch Catalog") + .get("/api/management/v1/catalogs/#{catalogName}") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .check(status.is(200)) + .check(jsonPath("$.type").is("INTERNAL")) + .check(jsonPath("$.name").is("#{catalogName}")) + .check(jsonPath("$.properties.default-base-location").is("#{defaultBaseLocation}")) + .check(jsonPath("$.storageConfigInfo.storageType").is("FILE")) + .check(jsonPath("$.storageConfigInfo.allowedLocations[0]").is("#{defaultBaseLocation}")) + ) + + /** + * Lists all available Iceberg catalogs in the deployment. This operation does not rely on any + * feeder data. + */ + val fetchAllCatalogs: ChainBuilder = exec( + http("Fetch all Catalogs") + .get("/api/management/v1/catalogs") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .check(status.is(200)) + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/EntityProperties.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/EntityProperties.scala new file mode 100644 index 0000000..962651d --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/EntityProperties.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import play.api.libs.json.Json + +object EntityProperties { + def filterMapByPrefix(json: String, prefix: String): Map[String, String] = + Json.parse(json).as[Map[String, String]].filter { case (k, _) => + k.startsWith(prefix) + } +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/NamespaceActions.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/NamespaceActions.scala new file mode 100644 index 0000000..89e0e41 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/NamespaceActions.scala @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import io.gatling.core.Predef._ +import io.gatling.core.feeder.Feeder +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.RetryOnHttpCodes.{ + retryOnHttpStatus, + HttpRequestBuilderWithStatusSave +} +import org.apache.polaris.benchmarks.parameters.{DatasetParameters, WorkloadParameters} +import org.slf4j.LoggerFactory +import play.api.libs.json.Json + +import java.util.concurrent.atomic.AtomicReference + +/** + * Actions for performance testing authentication operations. This class provides methods to + * authenticate and manage access tokens for API requests. + * + * @param dp Dataset parameters controlling the dataset generation + * @param wp Workload parameters controlling the workload generation + * @param accessToken Reference to the authentication token shared across actions + * @param maxRetries Maximum number of retry attempts for failed operations + * @param retryableHttpCodes HTTP status codes that should trigger a retry + */ +case class NamespaceActions( + dp: DatasetParameters, + wp: WorkloadParameters, + accessToken: AtomicReference[String], + maxRetries: Int = 10, + retryableHttpCodes: Set[Int] = Set(409, 500) +) { + private val logger = LoggerFactory.getLogger(getClass) + + /** + * Creates a Gatling Feeder that generates namespace hierarchies. Each row is associated with a + * distinct namespace. Namespaces are named "NS_n" where n is derived from the n-ary tree + * position. The feeder provides catalog name, namespace id and namespace path. + * + * @return An iterator providing namespace details + */ + def namespaceIdentityFeeder(): Feeder[Any] = Iterator + .from(0) + .map { tableId => + val namespaceId = tableId + val namespacePath: Seq[String] = dp.nAryTree + .pathToRoot(namespaceId) + .map(ordinal => s"NS_$ordinal") + Map( + "catalogName" -> "C_0", + "namespaceId" -> tableId, + "namespacePath" -> namespacePath, + "namespaceJsonPath" -> Json.toJson(namespacePath).toString(), + "namespaceMultipartPath" -> namespacePath.mkString("%1F") + ) + } + .take(dp.nAryTree.numberOfNodes) + + /** + * Creates a Gatling Feeder that generates namespace hierarchies. Each row is associated with a + * distinct namespace and leverages the [[NamespaceActions.namespaceIdentityFeeder()]]. Additional + * attributes are added to each row, like namespace properties. + * + * @return An iterator providing namespace details and their properties + */ + def namespaceCreationFeeder(): Feeder[Any] = namespaceIdentityFeeder() + .map { row => + val properties: Map[String, String] = (0 until dp.numNamespaceProperties) + .map(id => s"InitialAttribute_$id" -> s"$id") + .toMap + row ++ Map( + "initialProperties" -> properties, + "initialJsonProperties" -> Json.toJson(properties).toString() + ) + } + + /** + * Creates a Gatling Feeder that generates expected namespace attributes. Each row is associated + * with a distinct namespace and leverages the [[NamespaceActions.namespaceCreationFeeder]]. + * Additional attributes are added to each row so that the payload returned by the server can be + * verified. The initial properties from the namespace creation feeder are used to verify the + * namespace properties. + * + * @return An iterator providing namespace details and their properties + */ + def namespaceFetchFeeder(): Feeder[Any] = namespaceCreationFeeder() + .map { row => + val catalogName = row("catalogName").asInstanceOf[String] + val namespaceUnixPath = row("namespacePath").asInstanceOf[Seq[String]].mkString("/") + val location = Map( + "location" -> s"${dp.defaultBaseLocation}/$catalogName/$namespaceUnixPath" + ) + row ++ Map( + "location" -> location + ) + } + + def namespacePropertiesUpdateFeeder(): Feeder[Any] = namespaceIdentityFeeder() + .flatMap { row => + (0 until wp.updatesPerNamespace).map { updateId => + val updates = Map(s"UpdatedAttribute_$updateId" -> s"$updateId") + row ++ Map( + "jsonPropertyUpdates" -> Json.toJson(updates).toString() + ) + } + } + + /** + * Creates a new namespace in a specified catalog. The namespace is created with a full path and + * properties that are defined in the [[NamespaceActions.namespaceCreationFeeder]]. + * + * Namespaces have a dependency on the existence of their parent namespaces. As a result, the + * namespace creation operation is expected to fail if too many concurrent users are run. It is + * possible that a user tries to create a namespace for which the parent has not been fully + * created yet. + * + * Therefore, the number of concurrent users should start with 1 and increase gradually. + * Typically, start 1 user and increase by 1 user every second until some arbitrary maximum value. + */ + val createNamespace: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Create namespace")( + http("Create Namespace") + .post("/api/catalog/v1/#{catalogName}/namespaces") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + """{ + | "namespace": #{namespaceJsonPath}, + | "properties": #{initialJsonProperties} + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(200)) + .check(jsonPath("$.namespace").is("#{namespaceJsonPath}")) + ) + + /** + * Retrieves details of a specific namespace by its path. The namespace path and properties are + * verified against the values defined in the [[NamespaceActions.namespaceFetchFeeder]]. This + * operation validates the namespace existence and its configuration. + * + * There is no limit to the number of users that can fetch namespaces concurrently. + */ + val fetchNamespace: ChainBuilder = exec( + http("Fetch Namespace") + .get("/api/catalog/v1/#{catalogName}/namespaces/#{namespaceMultipartPath}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + .check(jsonPath("$.namespace").is("#{namespaceJsonPath}")) + .check( + jsonPath("$.properties") + .transform(str => EntityProperties.filterMapByPrefix(str, "InitialAttribute_")) + .is("#{initialProperties}") + ) + .check( + jsonPath("$.properties") + .transform(str => EntityProperties.filterMapByPrefix(str, "location")) + .is("#{location}") + ) + ) + + /** + * Checks if a specific namespace exists by its path. + */ + val checkNamespaceExists: ChainBuilder = exec( + http("Check Namespace Exists") + .head("/api/catalog/v1/#{catalogName}/namespaces/#{namespaceMultipartPath}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(204)) + ) + + /** + * Lists all child namespaces under a specific parent namespace. This operation retrieves the + * immediate children of a given namespace, supporting the hierarchical nature of the namespace + * structure. + */ + val fetchAllChildrenNamespaces: ChainBuilder = exec( + http("Fetch all Namespaces under specific parent") + .get("/api/catalog/v1/#{catalogName}/namespaces?parent=#{namespaceMultipartPath}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + ) + + val updateNamespaceProperties: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Update namespace properties")( + http("Update Namespace Properties") + .post("/api/catalog/v1/#{catalogName}/namespaces/#{namespaceMultipartPath}/properties") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + """{ + | "removals": [], + | "updates": #{jsonPropertyUpdates} + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(200)) + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/TableActions.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/TableActions.scala new file mode 100644 index 0000000..7c0901e --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/TableActions.scala @@ -0,0 +1,258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import io.gatling.core.Predef._ +import io.gatling.core.feeder.Feeder +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.RetryOnHttpCodes.{ + retryOnHttpStatus, + HttpRequestBuilderWithStatusSave +} +import org.apache.polaris.benchmarks.parameters.{DatasetParameters, WorkloadParameters} +import org.slf4j.LoggerFactory +import play.api.libs.json.Format.GenericFormat +import play.api.libs.json.OFormat.oFormatFromReadsAndOWrites +import play.api.libs.json.{Format, Json} + +import java.util.concurrent.atomic.AtomicReference + +/** + * Actions for performance testing table operations. This class provides methods to create and + * manage tables within namespaces. + * + * @param dp Dataset parameters controlling the dataset generation + * @param wp Workload parameters controlling the workload configuration + * @param accessToken Reference to the authentication token shared across actions + * @param maxRetries Maximum number of retry attempts for failed operations + * @param retryableHttpCodes HTTP status codes that should trigger a retry + */ +case class TableActions( + dp: DatasetParameters, + wp: WorkloadParameters, + accessToken: AtomicReference[String], + maxRetries: Int = 10, + retryableHttpCodes: Set[Int] = Set(409, 500) +) { + private val logger = LoggerFactory.getLogger(getClass) + + /** + * Creates a base Gatling Feeder that generates table identities. Each row contains the basic + * information needed to identify a table: catalog name, namespace path, and table name. Tables + * are named "T_n" where n is derived from the namespace and table position. + * + * @return An iterator providing table identity details + */ + def tableIdentityFeeder(): Feeder[Any] = dp.nAryTree.lastLevelOrdinals.iterator + .flatMap { namespaceId => + val positionInLevel = namespaceId - dp.nAryTree.lastLevelOrdinals.head + val parentNamespacePath: Seq[String] = dp.nAryTree + .pathToRoot(namespaceId) + .map(ordinal => s"NS_$ordinal") + Range(0, dp.numTablesPerNs) + .map { j => + val tableId = positionInLevel * dp.numTablesPerNs + j + Map( + "catalogName" -> "C_0", + "parentNamespacePath" -> parentNamespacePath, + "multipartNamespace" -> parentNamespacePath.mkString("%1F"), + "tableName" -> s"T_$tableId" + ) + } + } + .take(dp.numTables) + + /** + * Creates a Gatling Feeder that generates table creation details. Each row builds upon the table + * identity information of [[TableActions.tableIdentityFeeder]] and adds schema and property + * details needed for table creation. + * + * @return An iterator providing table creation details + */ + def tableCreationFeeder(): Feeder[Any] = tableIdentityFeeder() + .map { row => + // The field identifiers start at 1 because if they start at 0, they will be overwritten by Iceberg. + // See https://github.com/apache/iceberg/issues/10084 + val fields: Seq[TableField] = (1 to dp.numColumnsPerTable) + .map(id => TableField(id = id, name = s"column$id", `type` = "int", required = true)) + val properties: Map[String, String] = (0 until dp.numTableProperties) + .map(id => s"InitialAttribute_$id" -> s"$id") + .toMap + row ++ Map( + "schemasType" -> "struct", + "schemasIdentifierFieldIds" -> "[1]", + "fieldsStr" -> Json.toJson(fields).toString(), + "fields" -> fields, + "initialJsonProperties" -> Json.toJson(properties).toString() + ) + } + + /** + * Creates a Gatling Feeder that generates table property updates. Each row contains a single + * property update targeting a specific table. + * + * @return An iterator providing table property update details + */ + def propertyUpdateFeeder(): Feeder[Any] = tableIdentityFeeder() + .flatMap(row => + Range(0, wp.updatesPerTable) + .map(k => row + ("newProperty" -> s"""{"NewAttribute_$k": "NewValue_$k"}""")) + ) + + /** + * Creates a Gatling Feeder that generates table details. Each row builds upon the table creation + * information and adds schema expectations for fetch verification. The details should be used to + * verify the table schema and properties after creation. + * + * @return An iterator providing table fetch details with expected response values + */ + def tableFetchFeeder(): Feeder[Any] = tableIdentityFeeder() + .map { row => + val catalogName: String = row("catalogName").asInstanceOf[String] + val parentNamespacePath: Seq[String] = row("parentNamespacePath").asInstanceOf[Seq[String]] + val tableName: String = row("tableName").asInstanceOf[String] + val initialProperties: Map[String, String] = (0 until dp.numTableProperties) + .map(id => s"InitialAttribute_$id" -> s"$id") + .toMap + row ++ Map( + "initialProperties" -> initialProperties, + "location" -> s"${dp.defaultBaseLocation}/$catalogName/${parentNamespacePath.mkString("/")}/$tableName" + ) + } + + /** + * Creates a new table in a specified namespace. The table is created with a name, schema + * definition, and properties that are defined in the [[TableActions.tableCreationFeeder]]. The + * operation includes retry logic for handling transient failures. + * + * There is no limit to the number of users that can create tables concurrently. + */ + val createTable: ChainBuilder = retryOnHttpStatus(maxRetries, retryableHttpCodes, "Create table")( + http("Create Table") + .post("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/tables") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + """{ + | "name": "#{tableName}", + | "stage-create": false, + | "schema": { + | "type": "#{schemasType}", + | "fields": #{fieldsStr}, + | "identifier-field-ids": #{schemasIdentifierFieldIds} + | }, + | "properties": #{initialJsonProperties} + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(200)) + ) + + /** + * Retrieves details of a specific table by its name and namespace path. The table location, + * schema, and properties are verified against the values defined in the + * [[TableActions.tableFetchFeeder]]. This operation validates the table existence and its + * configuration. + * + * There is no limit to the number of users that can fetch tables concurrently. + */ + val fetchTable: ChainBuilder = exec( + http("Fetch Table") + .get("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/tables/#{tableName}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + .check(jsonPath("$.metadata.table-uuid").saveAs("tableUuid")) + .check(jsonPath("$.metadata.location").is("#{location}")) + .check( + jsonPath("$.metadata.properties") + .transform(str => EntityProperties.filterMapByPrefix(str, "InitialAttribute_")) + .is("#{initialProperties}") + ) + ) + + /** + * Checks if a specific table exists by its name and namespace path. + */ + val checkTableExists: ChainBuilder = exec( + http("Check Table Exists") + .head("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/tables/#{tableName}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(204)) + ) + + /** + * Lists all tables under a specific namespace. This operation retrieves all tables within the + * given namespace, supporting bulk retrieval of table metadata. + */ + val fetchAllTables: ChainBuilder = exec( + http("Fetch all Tables under parent namespace") + .get("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/tables") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + ) + + /** + * Updates the properties of a specific table by its name and namespace path. The table properties + * are updated with new values defined in the [[TableActions.propertyUpdateFeeder]]. + * + * There is no limit to the number of users that can update table properties concurrently. + */ + val updateTable: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Update table metadata")( + http("Update table metadata") + .post("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/tables/#{tableName}") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + s"""{ + | "updates": [{ + | "action": "set-properties", + | "updates": #{newProperty} + | }] + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(200).saveAs("lastStatus")) + ) +} + +/** + * This object provides JSON serialization for the table field schema so that it can be used in + * Gatling response checks. + */ +object TableField { + implicit val format: Format[TableField] = Json.format[TableField] + + def fromList(json: String): Seq[TableField] = Json.parse(json).as[Seq[TableField]] +} + +/** + * A case class representing a table field schema. + * @param id Field identifier + * @param name Field name + * @param `type` Field type + * @param required Field requirement + */ +case class TableField(id: Int, name: String, `type`: String, required: Boolean) diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/ViewActions.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/ViewActions.scala new file mode 100644 index 0000000..812c161 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/ViewActions.scala @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import io.gatling.core.Predef._ +import io.gatling.core.feeder.Feeder +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.RetryOnHttpCodes.{ + retryOnHttpStatus, + HttpRequestBuilderWithStatusSave +} +import org.apache.polaris.benchmarks.parameters.{DatasetParameters, WorkloadParameters} +import org.slf4j.LoggerFactory +import play.api.libs.json.{Format, Json} + +import java.time.Instant +import java.util.concurrent.atomic.AtomicReference + +case class ViewActions( + dp: DatasetParameters, + wp: WorkloadParameters, + accessToken: AtomicReference[String], + maxRetries: Int = 10, + retryableHttpCodes: Set[Int] = Set(409, 500) +) { + private val logger = LoggerFactory.getLogger(getClass) + + /** + * Creates a base Gatling Feeder that generates view identities. Each row contains the basic + * information needed to identify a view: catalog name, namespace path, and view name. + * + * @return An iterator providing view identity details + */ + def viewIdentityFeeder(): Feeder[Any] = dp.nAryTree.lastLevelOrdinals.iterator + .flatMap { namespaceId => + val catalogId = 0 + val parentNamespacePath: Seq[String] = dp.nAryTree + .pathToRoot(namespaceId) + .map(ordinal => s"NS_$ordinal") + val positionInLevel = namespaceId - dp.nAryTree.lastLevelOrdinals.head + + Range(0, dp.numViewsPerNs) + .map { j => + // Ensure the view ID matches that of the associated table + val viewId = positionInLevel * dp.numTablesPerNs + j + Map( + "catalogName" -> s"C_$catalogId", + "parentNamespacePath" -> parentNamespacePath, + "multipartNamespace" -> parentNamespacePath.mkString("%1F"), + "viewName" -> s"V_$viewId", + "viewId" -> viewId + ) + } + } + .take(dp.numViews) + + /** + * Creates a Gatling Feeder that generates view creation details. Each row builds upon the view + * identity information and adds schema and query details needed for view creation. + * + * @return An iterator providing view creation details + */ + def viewCreationFeeder(): Feeder[Any] = viewIdentityFeeder() + .map { row => + val viewId = row("viewId").asInstanceOf[Int] + val tableName = s"T_$viewId" + val fields: Seq[ViewField] = (1 to dp.numColumnsPerView) + .map(id => ViewField(id = id, name = s"column$id", `type` = "int", required = true)) + val properties: Map[String, String] = (0 until dp.numViewProperties) + .map(id => s"InitialAttribute_$id" -> s"$id") + .toMap + row ++ Map( + "tableName" -> tableName, // Reference the table at the same index as the view + "timestamp" -> Instant.now().toEpochMilli.toString, + "fieldsStr" -> Json.toJson(fields).toString(), + "fields" -> fields, + "sqlQuery" -> s"SELECT * FROM $tableName", + "initialJsonProperties" -> Json.toJson(properties).toString() + ) + } + + /** + * Creates a Gatling Feeder that generates view property updates. Each row contains a single + * property update targeting a specific view. + * + * @return An iterator providing view property update details + */ + def propertyUpdateFeeder(): Feeder[Any] = viewIdentityFeeder() + .flatMap(row => + Range(0, wp.updatesPerView) + .map(k => row + ("newProperty" -> s"""{"NewAttribute_$k": "NewValue_$k"}""")) + ) + + /** + * Creates a Gatling Feeder that generates view details. Each row builds upon the view creation + * information and adds schema expectations for fetch verification. The details should be used to + * verify the view schema and properties after creation. + * + * @return An iterator providing view fetch details with expected response values + */ + def viewFetchFeeder(): Feeder[Any] = viewCreationFeeder() + .map { row => + val catalogName: String = row("catalogName").asInstanceOf[String] + val parentNamespacePath: Seq[String] = row("parentNamespacePath").asInstanceOf[Seq[String]] + val viewName: String = row("viewName").asInstanceOf[String] + val initialProperties: Map[String, String] = (0 until dp.numViewProperties) + .map(id => s"InitialAttribute_$id" -> s"$id") + .toMap + row ++ Map( + "initialProperties" -> initialProperties, + "location" -> s"${dp.defaultBaseLocation}/$catalogName/${parentNamespacePath.mkString("/")}/$viewName" + ) + } + + val createView: ChainBuilder = retryOnHttpStatus(maxRetries, retryableHttpCodes, "Create view")( + http("Create View") + .post("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/views") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body(StringBody("""{ + | "name": "#{viewName}", + | "view-version": { + | "version-id": 1, + | "timestamp-ms": #{timestamp}, + | "schema-id": 0, + | "summary": { + | "engine-version": "3.5.5", + | "app-id": "gatling-#{timestamp}", + | "engine-name": "spark", + | "iceberg-version": "Apache Iceberg 1.7.0" + | }, + | "default-namespace": ["#{multipartNamespace}"], + | "representations": [ + | { + | "type": "sql", + | "sql": "#{sqlQuery}", + | "dialect": "spark" + | } + | ] + | }, + | "schema": { + | "type": "struct", + | "schema-id": 0, + | "fields": #{fieldsStr} + | }, + | "properties": #{initialJsonProperties} + |}""".stripMargin)) + .check(status.is(200)) + ) + + val fetchView: ChainBuilder = exec( + http("Fetch View") + .get("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/views/#{viewName}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + .check(jsonPath("$.metadata.view-uuid").saveAs("viewUuid")) + .check(jsonPath("$.metadata.location").is("#{location}")) + .check( + jsonPath("$.metadata.properties") + .transform(str => EntityProperties.filterMapByPrefix(str, "InitialAttribute_")) + .is("#{initialProperties}") + ) + ) + + /** + * Checks if a specific view exists by its name and namespace path. + */ + val checkViewExists: ChainBuilder = exec( + http("Check View Exists") + .head("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/views/#{viewName}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(204)) + ) + + val fetchAllViews: ChainBuilder = exec( + http("Fetch all Views under parent namespace") + .get("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/views") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + ) + + /** + * Updates the properties of a specific view by its name and namespace path. The view properties + * are updated with new values defined in the [[ViewActions.propertyUpdateFeeder]]. + * + * There is no limit to the number of users that can update table properties concurrently. + */ + val updateView: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Update View metadata")( + http("Update View metadata") + .post("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/views/#{viewName}") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + s"""{ + | "updates": [{ + | "action": "set-properties", + | "updates": #{newProperty} + | }] + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(200).saveAs("lastStatus")) + ) +} + +/** + * This object provides JSON serialization for the view field schema so that it can be used in + * Gatling response checks. + */ +object ViewField { + implicit val format: Format[ViewField] = Json.format[ViewField] + + def fromList(json: String): Seq[ViewField] = Json.parse(json).as[Seq[ViewField]] +} + +/** + * A case class representing a view field schema. + * @param id Field identifier + * @param name Field name + * @param `type` Field type + * @param required Field requirement + */ +case class ViewField(id: Int, name: String, `type`: String, required: Boolean) diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala new file mode 100644 index 0000000..3799965 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/BenchmarkConfig.scala @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.parameters + +import com.typesafe.config.{Config, ConfigFactory} + +object BenchmarkConfig { + val config: BenchmarkConfig = apply() + + def apply(): BenchmarkConfig = { + val config: Config = ConfigFactory.load().withFallback(ConfigFactory.load("benchmark-defaults")) + + val http: Config = config.getConfig("http") + val auth: Config = config.getConfig("auth") + val dataset: Config = config.getConfig("dataset.tree") + val workload: Config = config.getConfig("workload") + + val connectionParams = ConnectionParameters( + auth.getString("client-id"), + auth.getString("client-secret"), + http.getString("base-url") + ) + + val workloadParams = WorkloadParameters( + workload.getDouble("read-write-ratio"), + workload.getInt("updates-per-namespace"), + workload.getInt("updates-per-table"), + workload.getInt("updates-per-view"), + workload.getLong("seed") + ) + + val datasetParams = DatasetParameters( + dataset.getInt("num-catalogs"), + dataset.getString("default-base-location"), + dataset.getInt("namespace-width"), + dataset.getInt("namespace-depth"), + dataset.getInt("namespace-properties"), + dataset.getInt("tables-per-namespace"), + dataset.getInt("max-tables"), + dataset.getInt("columns-per-table"), + dataset.getInt("table-properties"), + dataset.getInt("views-per-namespace"), + dataset.getInt("max-views"), + dataset.getInt("columns-per-view"), + dataset.getInt("view-properties") + ) + + BenchmarkConfig(connectionParams, workloadParams, datasetParams) + } +} + +case class BenchmarkConfig( + connectionParameters: ConnectionParameters, + workloadParameters: WorkloadParameters, + datasetParameters: DatasetParameters +) {} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ConnectionParameters.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ConnectionParameters.scala new file mode 100644 index 0000000..67c8cc1 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ConnectionParameters.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.parameters + +/** + * Case class to hold the connection parameters for the benchmark. + * + * @param clientId The client ID for authentication. + * @param clientSecret The client secret for authentication. + * @param baseUrl The base URL of the Polaris service. + */ +case class ConnectionParameters(clientId: String, clientSecret: String, baseUrl: String) { + require(clientId != null && clientId.nonEmpty, "Client ID cannot be null or empty") + require(clientSecret != null && clientSecret.nonEmpty, "Client secret cannot be null or empty") + require(baseUrl != null && baseUrl.nonEmpty, "Base URL cannot be null or empty") + require( + baseUrl.startsWith("http://") || baseUrl.startsWith("https://"), + "Base URL must start with http:// or https://" + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/DatasetParameters.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/DatasetParameters.scala new file mode 100644 index 0000000..894cee2 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/DatasetParameters.scala @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.parameters + +import org.apache.polaris.benchmarks.NAryTreeBuilder + +/** + * Case class to hold the dataset parameters for the benchmark. + * + * @param numCatalogs The number of catalogs to create. + * @param defaultBaseLocation The default base location for the datasets. + * @param nsWidth The width of the namespace n-ary tree. + * @param nsDepth The depth of the namespace n-ary tree. + * @param numNamespaceProperties The number of namespace properties to create. + * @param numTablesPerNs The number of tables per namespace to create. + * @param numTablesMax The maximum number of tables to create. If set to -1, all tables are created. + * @param numColumnsPerTable The number of columns per table to create. + * @param numTableProperties The number of table properties to create. + * @param numViewsPerNs The number of views per namespace to create. + * @param numViewsMax The maximum number of views to create. If set to -1, all views are created. + * @param numColumnsPerView The number of columns per view to create. + * @param numViewProperties The number of view properties to create. + */ +case class DatasetParameters( + numCatalogs: Int, + defaultBaseLocation: String, + nsWidth: Int, + nsDepth: Int, + numNamespaceProperties: Int, + numTablesPerNs: Int, + numTablesMax: Int, + numColumnsPerTable: Int, + numTableProperties: Int, + numViewsPerNs: Int, + numViewsMax: Int, + numColumnsPerView: Int, + numViewProperties: Int +) { + val nAryTree: NAryTreeBuilder = NAryTreeBuilder(nsWidth, nsDepth) + private val maxPossibleTables = nAryTree.numberOfLastLevelElements * numTablesPerNs + private val maxPossibleViews = nAryTree.numberOfLastLevelElements * numViewsPerNs + val numTables: Int = if (numTablesMax <= 0) maxPossibleTables else numTablesMax + val numViews: Int = if (numViewsMax <= 0) maxPossibleViews else numViewsMax + + require(numCatalogs > 0, "Number of catalogs must be positive") + require(defaultBaseLocation.nonEmpty, "Base location cannot be empty") + require(nsWidth > 0, "Namespace width must be positive") + require(nsDepth > 0, "Namespace depth must be positive") + require(numNamespaceProperties >= 0, "Number of namespace properties cannot be negative") + require(numTablesPerNs >= 0, "Number of tables per namespace cannot be negative") + require(numColumnsPerTable > 0, "Number of columns per table must be positive") + require(numTableProperties >= 0, "Number of table properties cannot be negative") + require(numViewsPerNs >= 0, "Number of views per namespace cannot be negative") + require(numColumnsPerView > 0, "Number of columns per view must be positive") + require(numViewProperties >= 0, "Number of view properties cannot be negative") + require( + numTablesMax == -1 || numTablesMax <= maxPossibleTables, + s"Maximum number of tables ($numTablesMax) cannot exceed computed total ($maxPossibleTables)" + ) + require( + numViewsMax == -1 || numViewsMax <= maxPossibleViews, + s"Maximum number of views ($numViewsMax) cannot exceed computed total ($maxPossibleViews)" + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala new file mode 100644 index 0000000..6831f44 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.parameters + +case class WorkloadParameters( + readWriteRatio: Double, + updatesPerNamespace: Int, + updatesPerTable: Int, + updatesPerView: Int, + seed: Long +) { + require( + readWriteRatio >= 0.0 && readWriteRatio <= 1.0, + "Read/write ratio must be between 0.0 and 1.0 inclusive" + ) + + require( + updatesPerNamespace >= 0, + "Updates per namespace must be non-negative" + ) + + require( + updatesPerTable >= 0, + "Updates per table must be non-negative" + ) + + require( + updatesPerView >= 0, + "Updates per view must be non-negative" + ) + + val gatlingReadRatio: Double = readWriteRatio * 100 + val gatlingWriteRatio: Double = (1 - readWriteRatio) * 100 +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDataset.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDataset.scala new file mode 100644 index 0000000..cd01faf --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDataset.scala @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.core.structure.ScenarioBuilder +import org.apache.polaris.benchmarks.actions._ +import org.apache.polaris.benchmarks.parameters.BenchmarkConfig.config +import org.apache.polaris.benchmarks.parameters.{ + ConnectionParameters, + DatasetParameters, + WorkloadParameters +} +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.{AtomicInteger, AtomicReference} + +/** + * This simulation is a 100% write workload that creates a tree dataset in Polaris. It is intended + * to be used against an empty Polaris instance. + */ +class CreateTreeDataset extends Simulation { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Load parameters + // -------------------------------------------------------------------------------- + val cp: ConnectionParameters = config.connectionParameters + val dp: DatasetParameters = config.datasetParameters + val wp: WorkloadParameters = config.workloadParameters + + // -------------------------------------------------------------------------------- + // Helper values + // -------------------------------------------------------------------------------- + private val numNamespaces: Int = dp.nAryTree.numberOfNodes + private val accessToken: AtomicReference[String] = new AtomicReference() + + private val authenticationActions = AuthenticationActions(cp, accessToken, 5, Set(500)) + private val catalogActions = CatalogActions(dp, accessToken, 0, Set()) + private val namespaceActions = NamespaceActions(dp, wp, accessToken, 5, Set(500)) + private val tableActions = TableActions(dp, wp, accessToken, 0, Set()) + private val viewActions = ViewActions(dp, wp, accessToken, 0, Set()) + + private val createdCatalogs = new AtomicInteger() + private val createdNamespaces = new AtomicInteger() + private val createdTables = new AtomicInteger() + private val createdViews = new AtomicInteger() + + // -------------------------------------------------------------------------------- + // Workload: Authenticate and store the access token for later use + // -------------------------------------------------------------------------------- + val authenticate: ScenarioBuilder = scenario("Authenticate using the OAuth2 REST API endpoint") + .feed(authenticationActions.feeder()) + .exec(authenticationActions.authenticateAndSaveAccessToken) + + // -------------------------------------------------------------------------------- + // Workload: Create catalogs + // -------------------------------------------------------------------------------- + val createCatalogs: ScenarioBuilder = + scenario("Create catalogs using the Polaris Management REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + createdCatalogs.getAndIncrement() < dp.numCatalogs && session.contains("accessToken") + )( + feed(catalogActions.feeder()) + .exec(catalogActions.createCatalog) + ) + + // -------------------------------------------------------------------------------- + // Workload: Create namespaces + // -------------------------------------------------------------------------------- + val createNamespaces: ScenarioBuilder = scenario("Create namespaces using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + createdNamespaces.getAndIncrement() < numNamespaces && session.contains("accessToken") + )( + feed(namespaceActions.namespaceCreationFeeder()) + .exec(namespaceActions.createNamespace) + ) + + // -------------------------------------------------------------------------------- + // Workload: Create tables + // -------------------------------------------------------------------------------- + val createTables: ScenarioBuilder = scenario("Create tables using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + createdTables.getAndIncrement() < dp.numTables && session.contains("accessToken") + )( + feed(tableActions.tableCreationFeeder()) + .exec(tableActions.createTable) + ) + + // -------------------------------------------------------------------------------- + // Workload: Create views + // -------------------------------------------------------------------------------- + val createViews: ScenarioBuilder = scenario("Create views using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + createdViews.getAndIncrement() < dp.numViews && session.contains("accessToken") + )( + feed(viewActions.viewCreationFeeder()) + .exec(viewActions.createView) + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetConcurrent.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetConcurrent.scala new file mode 100644 index 0000000..6381bd7 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetConcurrent.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ +import org.slf4j.LoggerFactory + +import scala.concurrent.duration._ + +/** + * This simulation is a 100% write workload that creates a tree dataset in Polaris. It is intended + * to be used against an empty Polaris instance. It is a concurrent version of CreateTreeDataset, + * i.e. up to 50 requests are sent simultaneously. + */ +class CreateTreeDatasetConcurrent extends CreateTreeDataset { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + setUp( + authenticate + .inject(atOnceUsers(1)) + .andThen(createCatalogs.inject(atOnceUsers(50))) + .andThen( + createNamespaces.inject( + constantUsersPerSec(1).during(1.seconds), + constantUsersPerSec(dp.nsWidth - 1).during(dp.nsDepth.seconds) + ) + ) + .andThen(createTables.inject(atOnceUsers(20))) + .andThen(createViews.inject(atOnceUsers(20))) + ).protocols(httpProtocol) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetSequential.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetSequential.scala new file mode 100644 index 0000000..cc46c51 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetSequential.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ +import org.slf4j.LoggerFactory + +/** + * This simulation is a 100% write workload that creates a tree dataset in Polaris. It is intended + * to be used against an empty Polaris instance. It is a sequential version of CreateTreeDataset, + * i.e. only one request is sent at a time. + */ +class CreateTreeDatasetSequential extends CreateTreeDataset { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + setUp( + authenticate + .inject(atOnceUsers(1)) + .andThen(createCatalogs.inject(atOnceUsers(1))) + .andThen(createNamespaces.inject(atOnceUsers(1))) + .andThen(createTables.inject(atOnceUsers(1))) + .andThen(createViews.inject(atOnceUsers(1))) + ).protocols(httpProtocol) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadTreeDataset.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadTreeDataset.scala new file mode 100644 index 0000000..b35c409 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadTreeDataset.scala @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.actions._ +import org.apache.polaris.benchmarks.parameters.BenchmarkConfig.config +import org.apache.polaris.benchmarks.parameters.WorkloadParameters +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.{AtomicInteger, AtomicReference} + +/** + * This simulation is a 100% read workload that fetches a tree dataset in Polaris. It is intended to + * be used against a Polaris instance with a pre-existing tree dataset. It has no side effect on the + * dataset and therefore can be executed multiple times without any issue. + */ +class ReadTreeDataset extends Simulation { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Load parameters + // -------------------------------------------------------------------------------- + private val cp = config.connectionParameters + private val dp = config.datasetParameters + val wp: WorkloadParameters = config.workloadParameters + + // -------------------------------------------------------------------------------- + // Helper values + // -------------------------------------------------------------------------------- + private val numNamespaces: Int = dp.nAryTree.numberOfNodes + private val accessToken: AtomicReference[String] = new AtomicReference() + + private val authenticationActions = AuthenticationActions(cp, accessToken) + private val catalogActions = CatalogActions(dp, accessToken) + private val namespaceActions = NamespaceActions(dp, wp, accessToken) + private val tableActions = TableActions(dp, wp, accessToken) + private val viewActions = ViewActions(dp, wp, accessToken) + + private val verifiedCatalogs = new AtomicInteger() + private val verifiedNamespaces = new AtomicInteger() + private val verifiedTables = new AtomicInteger() + private val verifiedViews = new AtomicInteger() + + // -------------------------------------------------------------------------------- + // Workload: Authenticate and store the access token for later use + // -------------------------------------------------------------------------------- + private val authenticate = scenario("Authenticate using the OAuth2 REST API endpoint") + .feed(authenticationActions.feeder()) + .tryMax(5) { + exec(authenticationActions.authenticateAndSaveAccessToken) + } + + // -------------------------------------------------------------------------------- + // Workload: Verify each catalog + // -------------------------------------------------------------------------------- + private val verifyCatalogs = scenario("Verify catalogs using the Polaris Management REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + verifiedCatalogs.getAndIncrement() < dp.numCatalogs && session.contains("accessToken") + )( + feed(catalogActions.feeder()) + .exec(catalogActions.fetchCatalog) + ) + + // -------------------------------------------------------------------------------- + // Workload: Verify namespaces + // -------------------------------------------------------------------------------- + private val verifyNamespaces = scenario("Verify namespaces using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + verifiedNamespaces.getAndIncrement() < numNamespaces && session.contains("accessToken") + )( + feed(namespaceActions.namespaceFetchFeeder()) + .exec(namespaceActions.fetchAllChildrenNamespaces) + .exec(namespaceActions.checkNamespaceExists) + .exec(namespaceActions.fetchNamespace) + ) + + // -------------------------------------------------------------------------------- + // Workload: Verify tables + // -------------------------------------------------------------------------------- + private val verifyTables = scenario("Verify tables using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + verifiedTables.getAndIncrement() < dp.numTables && session.contains("accessToken") + )( + feed(tableActions.tableFetchFeeder()) + .exec(tableActions.fetchAllTables) + .exec(tableActions.checkTableExists) + .exec(tableActions.fetchTable) + ) + + // -------------------------------------------------------------------------------- + // Workload: Verify views + // -------------------------------------------------------------------------------- + private val verifyViews = scenario("Verify views using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + verifiedViews.getAndIncrement() < dp.numViews && session.contains("accessToken") + )( + feed(viewActions.viewFetchFeeder()) + .exec(viewActions.fetchAllViews) + .exec(viewActions.checkViewExists) + .exec(viewActions.fetchView) + ) + + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + setUp( + authenticate + .inject(atOnceUsers(1)) + .andThen(verifyCatalogs.inject(atOnceUsers(1))) + .andThen(verifyNamespaces.inject(atOnceUsers(dp.nsDepth))) + .andThen(verifyTables.inject(atOnceUsers(50))) + .andThen(verifyViews.inject(atOnceUsers(50))) + ) + .protocols(httpProtocol) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala new file mode 100644 index 0000000..c1dbe37 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.core.structure.ScenarioBuilder +import org.apache.polaris.benchmarks.actions._ +import org.apache.polaris.benchmarks.parameters.BenchmarkConfig.config +import org.apache.polaris.benchmarks.parameters.{ + ConnectionParameters, + DatasetParameters, + WorkloadParameters +} +import org.apache.polaris.benchmarks.util.CircularIterator +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.AtomicReference + +class ReadUpdateTreeDataset extends Simulation { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Load parameters + // -------------------------------------------------------------------------------- + val cp: ConnectionParameters = config.connectionParameters + val dp: DatasetParameters = config.datasetParameters + val wp: WorkloadParameters = config.workloadParameters + + // -------------------------------------------------------------------------------- + // Helper values + // -------------------------------------------------------------------------------- + private val numNamespaces: Int = dp.nAryTree.numberOfNodes + private val accessToken: AtomicReference[String] = new AtomicReference() + + private val authActions = AuthenticationActions(cp, accessToken) + private val catActions = CatalogActions(dp, accessToken) + private val nsActions = NamespaceActions(dp, wp, accessToken) + private val tblActions = TableActions(dp, wp, accessToken) + private val viewActions = ViewActions(dp, wp, accessToken) + + // -------------------------------------------------------------------------------- + // Workload: Authenticate and store the access token for later use + // -------------------------------------------------------------------------------- + val authenticate: ScenarioBuilder = scenario("Authenticate using the OAuth2 REST API endpoint") + .feed(authActions.feeder()) + .tryMax(5) { + exec(authActions.authenticateAndSaveAccessToken) + } + + private val nsListFeeder = new CircularIterator(nsActions.namespaceIdentityFeeder) + private val nsExistsFeeder = new CircularIterator(nsActions.namespaceIdentityFeeder) + private val nsFetchFeeder = new CircularIterator(nsActions.namespaceFetchFeeder) + private val nsUpdateFeeder = new CircularIterator(nsActions.namespacePropertiesUpdateFeeder) + + private val tblListFeeder = new CircularIterator(tblActions.tableIdentityFeeder) + private val tblExistsFeeder = new CircularIterator(tblActions.tableIdentityFeeder) + private val tblFetchFeeder = new CircularIterator(tblActions.tableFetchFeeder) + private val tblUpdateFeeder = new CircularIterator(tblActions.propertyUpdateFeeder) + + private val viewListFeeder = new CircularIterator(viewActions.viewIdentityFeeder) + private val viewExistsFeeder = new CircularIterator(viewActions.viewIdentityFeeder) + private val viewFetchFeeder = new CircularIterator(viewActions.viewFetchFeeder) + private val viewUpdateFeeder = new CircularIterator(viewActions.propertyUpdateFeeder) + + // -------------------------------------------------------------------------------- + // Workload: Randomly read and write entities + // -------------------------------------------------------------------------------- + val readWriteScenario: ScenarioBuilder = + scenario("Read and write entities using the Iceberg REST API") + .exec(authActions.restoreAccessTokenInSession) + .randomSwitch( + wp.gatlingReadRatio -> group("Read")( + uniformRandomSwitch( + exec(feed(nsListFeeder).exec(nsActions.fetchAllChildrenNamespaces)), + exec(feed(nsExistsFeeder).exec(nsActions.checkNamespaceExists)), + exec(feed(nsFetchFeeder).exec(nsActions.fetchNamespace)), + exec(feed(tblListFeeder).exec(tblActions.fetchAllTables)), + exec(feed(tblExistsFeeder).exec(tblActions.checkTableExists)), + exec(feed(tblFetchFeeder).exec(tblActions.fetchTable)), + exec(feed(viewListFeeder).exec(viewActions.fetchAllViews)), + exec(feed(viewExistsFeeder).exec(viewActions.checkViewExists)), + exec(feed(viewFetchFeeder).exec(viewActions.fetchView)) + ) + ), + wp.gatlingWriteRatio -> group("Write")( + uniformRandomSwitch( + exec(feed(nsUpdateFeeder).exec(nsActions.updateNamespaceProperties)), + exec(feed(tblUpdateFeeder).exec(tblActions.updateTable)), + exec(feed(viewUpdateFeeder).exec(viewActions.updateView)) + ) + ) + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetConcurrent.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetConcurrent.scala new file mode 100644 index 0000000..35e219a --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetConcurrent.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ + +import scala.concurrent.duration.DurationInt + +class ReadUpdateTreeDatasetConcurrent extends ReadUpdateTreeDataset { + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + setUp( + authenticate + .inject(atOnceUsers(1)) + .andThen(readWriteScenario.inject(constantUsersPerSec(100).during(5.minutes).randomized)) + ).protocols(httpProtocol) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetSequential.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetSequential.scala new file mode 100644 index 0000000..4c9e80e --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetSequential.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ + +import scala.concurrent.duration.DurationInt + +class ReadUpdateTreeDatasetSequential extends ReadUpdateTreeDataset { + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + setUp( + authenticate + .inject(atOnceUsers(1)) + .andThen(readWriteScenario.inject(constantUsersPerSec(1).during(5.minutes))) + ).protocols(httpProtocol) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/util/CircularIterator.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/util/CircularIterator.scala new file mode 100644 index 0000000..0694f50 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/util/CircularIterator.scala @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.util + +import scala.util.Random + +class CircularIterator[T](builder: () => Iterator[T]) extends Iterator[T] { + private var currentIterator: Iterator[T] = builder() + + override def hasNext: Boolean = true + + override def next(): T = synchronized { + if (!currentIterator.hasNext) { + currentIterator = builder() + } + currentIterator.next() + } +} + +class BufferedRandomIterator[T](underlying: CircularIterator[T], bufferSize: Int, seed: Long) + extends Iterator[T] { + private val random = new Random(seed) + private var buffer: Iterator[T] = populateAndShuffle() + + private def populateAndShuffle(): Iterator[T] = + random.shuffle((1 to bufferSize).map(_ => underlying.next()).toList).iterator + + override def hasNext: Boolean = true + + override def next(): T = synchronized { + if (!buffer.hasNext) { + buffer = populateAndShuffle() + } + buffer.next() + } +}