This is an automated email from the ASF dual-hosted git repository.
asorokoumov pushed a commit to branch 0.7
in repository https://gitbox.apache.org/repos/asf/otava.git
The following commit(s) were added to refs/heads/0.7 by this push:
new 3021151 Address feedback from 0.7.0-rc3 (#104)
3021151 is described below
commit 30211513b326328a7272b8820292d73458a2310c
Author: Alex Sorokoumov <[email protected]>
AuthorDate: Fri Nov 28 11:10:31 2025 -0800
Address feedback from 0.7.0-rc3 (#104)
* Revert "Fix multiple issues discovered via running examples (#95)"
This reverts commit 5276f24de6f0853f8304374dd1b3772501863c3c.
* Revert "Fix config parsing (#91)"
This reverts commit 484aaef8493a076b42d1b0e92679d9edb87fb043.
* Revert "OTAVA-82: use ConfigArgParse to create Config (#86)"
This reverts commit 69d2b97a6c38873b74755bf1104a69f099b922b9.
* Fix docker run commands in examples
* Bump year in the postgres example data
By default, Otava looks 1 year back. Example data is too old.
* Update CLI help description
* Add e2e tests covering CSV and PostgreSQL usage
* OTAVA-65: Fix --help requiring a config file
* Run tests against branch 0.7
* Fixup csv docs
---
.github/workflows/python-app.yml | 4 +-
docs/CSV.md | 2 +-
otava/bigquery.py | 16 -
otava/config.py | 219 +++++------
otava/grafana.py | 16 -
otava/graphite.py | 12 -
otava/main.py | 38 +-
otava/postgres.py | 20 -
.../resources/otava.yaml | 43 ++-
otava/slack.py | 16 -
pyproject.toml | 3 +-
tests/config_test.py | 377 +------------------
tests/csv_e2e_test.py | 230 ++++++++++++
tests/postgres_e2e_test.py | 415 +++++++++++++++++++++
uv.lock | 25 +-
15 files changed, 807 insertions(+), 629 deletions(-)
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index c3a94a9..77d0b10 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -22,9 +22,9 @@ name: Python application
on:
push:
- branches: [master]
+ branches: [master, "0.7"]
pull_request:
- branches: [master]
+ branches: [master, "0.7"]
jobs:
build:
diff --git a/docs/CSV.md b/docs/CSV.md
index 8eb65b7..7aad9b2 100644
--- a/docs/CSV.md
+++ b/docs/CSV.md
@@ -20,7 +20,7 @@
# Importing results from CSV
> [!TIP]
-> See [otava.yaml](../examples/csv/otava.yaml) for the full example
configuration.
+> See [otava.yaml](../examples/csv/config/otava.yaml) for the full example
configuration.
## Tests
diff --git a/otava/bigquery.py b/otava/bigquery.py
index cf5fafd..7ef7c92 100644
--- a/otava/bigquery.py
+++ b/otava/bigquery.py
@@ -28,26 +28,10 @@ from otava.test_config import BigQueryTestConfig
@dataclass
class BigQueryConfig:
- NAME = "bigquery"
-
project_id: str
dataset: str
credentials: str
- @staticmethod
- def add_parser_args(arg_group):
- arg_group.add_argument("--bigquery-project-id", help="BigQuery project
ID", env_var="BIGQUERY_PROJECT_ID")
- arg_group.add_argument("--bigquery-dataset", help="BigQuery dataset",
env_var="BIGQUERY_DATASET")
- arg_group.add_argument("--bigquery-credentials", help="BigQuery
credentials file", env_var="BIGQUERY_VAULT_SECRET")
-
- @staticmethod
- def from_parser_args(args):
- return BigQueryConfig(
- project_id=getattr(args, 'bigquery_project_id', None),
- dataset=getattr(args, 'bigquery_dataset', None),
- credentials=getattr(args, 'bigquery_credentials', None)
- )
-
@dataclass
class BigQueryError(Exception):
diff --git a/otava/config.py b/otava/config.py
index ca1d587..37936c6 100644
--- a/otava/config.py
+++ b/otava/config.py
@@ -14,12 +14,12 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-import logging
+
+import os
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional
-import configargparse
from expandvars import expandvars
from ruamel.yaml import YAML
@@ -61,7 +61,7 @@ def load_tests(config: Dict, templates: Dict) -> Dict[str,
TestConfig]:
raise ConfigError("Property `tests` is not a dictionary")
result = {}
- for test_name, test_config in tests.items():
+ for (test_name, test_config) in tests.items():
template_names = test_config.get("inherit", [])
if not isinstance(template_names, List):
template_names = [templates]
@@ -81,7 +81,7 @@ def load_test_groups(config: Dict, tests: Dict[str,
TestConfig]) -> Dict[str, Li
raise ConfigError("Property `test_groups` is not a dictionary")
result = {}
- for group_name, test_names in groups.items():
+ for (group_name, test_names) in groups.items():
test_list = []
if not isinstance(test_names, List):
raise ConfigError(f"Test group {group_name} must be a list")
@@ -96,124 +96,107 @@ def load_test_groups(config: Dict, tests: Dict[str,
TestConfig]) -> Dict[str, Li
return result
-def expand_env_vars_recursive(obj):
- """Recursively expand environment variables in all string values within a
nested structure.
-
- Raises ConfigError if any environment variables remain undefined after
expansion.
- """
- if isinstance(obj, dict):
- return {key: expand_env_vars_recursive(value) for key, value in
obj.items()}
- elif isinstance(obj, list):
- return [expand_env_vars_recursive(item) for item in obj]
- elif isinstance(obj, str):
- return expandvars(obj, nounset=True)
- else:
- return obj
-
-
-def load_config_from_parser_args(args: configargparse.Namespace) -> Config:
- config_file = getattr(args, "config_file", None)
- if config_file is not None:
+def load_config_from(config_file: Path) -> Config:
+ """Loads config from the specified location"""
+ try:
+ content = expandvars(config_file.read_text(), nounset=True)
yaml = YAML(typ="safe")
- config = yaml.load(Path(config_file).read_text())
-
- # Expand environment variables in the entire config after CLI argument
replacement
- try:
- config = expand_env_vars_recursive(config)
- except Exception as e:
- raise ConfigError(f"Error expanding environment variables: {e}")
+ config = yaml.load(content)
+ """
+ if Grafana configs not explicitly set in yaml file, default to same as
Graphite
+ server at port 3000
+ """
+ graphite_config = None
+ grafana_config = None
+ if "graphite" in config:
+ if "url" not in config["graphite"]:
+ raise ValueError("graphite.url")
+ graphite_config = GraphiteConfig(url=config["graphite"]["url"])
+ if config.get("grafana") is None:
+ config["grafana"] = {}
+ config["grafana"]["url"] =
f"{config['graphite']['url'].strip('/')}:3000/"
+ config["grafana"]["user"] = os.environ.get("GRAFANA_USER",
"admin")
+ config["grafana"]["password"] =
os.environ.get("GRAFANA_PASSWORD", "admin")
+ grafana_config = GrafanaConfig(
+ url=config["grafana"]["url"],
+ user=config["grafana"]["user"],
+ password=config["grafana"]["password"],
+ )
+
+ slack_config = None
+ if config.get("slack") is not None:
+ if not config["slack"]["token"]:
+ raise ValueError("slack.token")
+ slack_config = SlackConfig(
+ bot_token=config["slack"]["token"],
+ )
+
+ postgres_config = None
+ if config.get("postgres") is not None:
+ if not config["postgres"]["hostname"]:
+ raise ValueError("postgres.hostname")
+ if not config["postgres"]["port"]:
+ raise ValueError("postgres.port")
+ if not config["postgres"]["username"]:
+ raise ValueError("postgres.username")
+ if not config["postgres"]["password"]:
+ raise ValueError("postgres.password")
+ if not config["postgres"]["database"]:
+ raise ValueError("postgres.database")
+
+ postgres_config = PostgresConfig(
+ hostname=config["postgres"]["hostname"],
+ port=config["postgres"]["port"],
+ username=config["postgres"]["username"],
+ password=config["postgres"]["password"],
+ database=config["postgres"]["database"],
+ )
+
+ bigquery_config = None
+ if config.get("bigquery") is not None:
+ bigquery_config = BigQueryConfig(
+ project_id=config["bigquery"]["project_id"],
+ dataset=config["bigquery"]["dataset"],
+ credentials=config["bigquery"]["credentials"],
+ )
templates = load_templates(config)
tests = load_tests(config, templates)
groups = load_test_groups(config, tests)
- else:
- logging.warning("Otava configuration file not found or not specified")
- tests = {}
- groups = {}
-
- return Config(
- graphite=GraphiteConfig.from_parser_args(args),
- grafana=GrafanaConfig.from_parser_args(args),
- slack=SlackConfig.from_parser_args(args),
- postgres=PostgresConfig.from_parser_args(args),
- bigquery=BigQueryConfig.from_parser_args(args),
- tests=tests,
- test_groups=groups,
- )
-
-
-class NestedYAMLConfigFileParser(configargparse.ConfigFileParser):
- """
- Custom YAML config file parser that supports nested YAML structures.
- Maps nested keys like 'slack: {token: value}' to 'slack-token=value', i.e.
CLI argument style.
- Recasts values from YAML inferred types to strings as expected for CLI
arguments.
- """
-
- CLI_CONFIG_SECTIONS = [
- GraphiteConfig.NAME,
- GrafanaConfig.NAME,
- SlackConfig.NAME,
- PostgresConfig.NAME,
- BigQueryConfig.NAME,
+
+ return Config(
+ graphite=graphite_config,
+ grafana=grafana_config,
+ slack=slack_config,
+ postgres=postgres_config,
+ bigquery=bigquery_config,
+ tests=tests,
+ test_groups=groups,
+ )
+
+ except FileNotFoundError as e:
+ raise ConfigError(f"Configuration file not found: {e.filename}")
+ except KeyError as e:
+ raise ConfigError(f"Configuration key not found: {e.args[0]}")
+ except ValueError as e:
+ raise ConfigError(f"Value for configuration key not found:
{e.args[0]}")
+
+
+def load_config() -> Config:
+ """Loads config from one of the default locations"""
+
+ env_config_path = os.environ.get("OTAVA_CONFIG")
+ if env_config_path:
+ return load_config_from(Path(env_config_path).absolute())
+
+ paths = [
+ Path().home() / ".otava/otava.yaml",
+ Path().home() / ".otava/conf.yaml",
+ Path(os.path.realpath(__file__)).parent / "resources/otava.yaml",
]
- def parse(self, stream):
- yaml = YAML(typ="safe")
- config_data = yaml.load(stream)
- if config_data is None:
- return {}
-
- flattened_dict = {}
- for key, value in config_data.items():
- if key in self.CLI_CONFIG_SECTIONS:
- # Flatten only the config sections that correspond to CLI
arguments
- self._flatten_dict(value, flattened_dict, f"{key}-")
- # Ignore other sections like 'templates' and 'tests' - they
shouldn't become CLI arguments
- return flattened_dict
-
- def _flatten_dict(self, nested_dict, flattened_dict, prefix=''):
- """Recursively flatten nested dictionaries using CLI dash-separated
notation for keys."""
- if not isinstance(nested_dict, dict):
- return
-
- for key, value in nested_dict.items():
- new_key = f"{prefix}{key}" if prefix else key
-
- # yaml keys typically use snake case
- # replace underscore with dash to convert snake case to CLI
dash-separated style
- new_key = new_key.replace("_", "-")
-
- if isinstance(value, dict):
- # Recursively process nested dictionaries
- self._flatten_dict(value, flattened_dict, f"{new_key}-")
- else:
- # Add leaf values to the flattened dictionary
- # Value must be cast to string here, so arg parser can cast
from string to expected type later
- flattened_dict[new_key] = str(value)
-
-
-def create_config_parser() -> configargparse.ArgumentParser:
- parser = configargparse.ArgumentParser(
- add_help=False,
- config_file_parser_class=NestedYAMLConfigFileParser,
- default_config_files=[
- Path().home() / ".otava/conf.yaml",
- Path().home() / ".otava/otava.yaml",
- ],
- allow_abbrev=False, # required for correct parsing of nested values
from config file
- )
- parser.add_argument('--config-file', is_config_file=True, help='Otava
config file path', env_var="OTAVA_CONFIG")
- GraphiteConfig.add_parser_args(parser.add_argument_group('Graphite
Options', 'Options for Graphite configuration'))
- GrafanaConfig.add_parser_args(parser.add_argument_group('Grafana Options',
'Options for Grafana configuration'))
- SlackConfig.add_parser_args(parser.add_argument_group('Slack Options',
'Options for Slack configuration'))
- PostgresConfig.add_parser_args(parser.add_argument_group('Postgres
Options', 'Options for Postgres configuration'))
- BigQueryConfig.add_parser_args(parser.add_argument_group('BigQuery
Options', 'Options for BigQuery configuration'))
- return parser
-
-
-def load_config_from_file(config_file: str, arg_overrides: Optional[List[str]]
= None) -> Config:
- if arg_overrides is None:
- arg_overrides = []
- arg_overrides.extend(["--config-file", config_file])
- args, _ = create_config_parser().parse_known_args(args=arg_overrides)
- return load_config_from_parser_args(args)
+ for p in paths:
+ if p.exists():
+ return load_config_from(p)
+
+ raise ConfigError(f"No configuration file found. Checked $OTAVA_CONFIG and
searched: {paths}")
diff --git a/otava/grafana.py b/otava/grafana.py
index b853f3a..dbf9e42 100644
--- a/otava/grafana.py
+++ b/otava/grafana.py
@@ -26,26 +26,10 @@ from requests.exceptions import HTTPError
@dataclass
class GrafanaConfig:
- NAME = "grafana"
-
url: str
user: str
password: str
- @staticmethod
- def add_parser_args(arg_group):
- arg_group.add_argument("--grafana-url", help="Grafana server URL",
env_var="GRAFANA_ADDRESS")
- arg_group.add_argument("--grafana-user", help="Grafana server user",
env_var="GRAFANA_USER", default="admin")
- arg_group.add_argument("--grafana-password", help="Grafana server
password", env_var="GRAFANA_PASSWORD", default="admin")
-
- @staticmethod
- def from_parser_args(args):
- return GrafanaConfig(
- url=getattr(args, 'grafana_url', None),
- user=getattr(args, 'grafana_user', None),
- password=getattr(args, 'grafana_password', None)
- )
-
@dataclass
class GrafanaError(Exception):
diff --git a/otava/graphite.py b/otava/graphite.py
index 69a7592..6536079 100644
--- a/otava/graphite.py
+++ b/otava/graphite.py
@@ -29,20 +29,8 @@ from otava.util import parse_datetime
@dataclass
class GraphiteConfig:
- NAME = "graphite"
-
url: str
- @staticmethod
- def add_parser_args(arg_group):
- arg_group.add_argument("--graphite-url", help="Graphite server URL",
env_var="GRAPHITE_ADDRESS")
-
- @staticmethod
- def from_parser_args(args):
- return GraphiteConfig(
- url=getattr(args, 'graphite_url', None)
- )
-
@dataclass
class DataPoint:
diff --git a/otava/main.py b/otava/main.py
index 1c2e1e4..f5fc9f1 100644
--- a/otava/main.py
+++ b/otava/main.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+import argparse
import copy
import logging
import sys
@@ -22,14 +23,13 @@ from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Dict, List, Optional
-import configargparse as argparse
import pytz
from slack_sdk import WebClient
from otava import config
from otava.attributes import get_back_links
from otava.bigquery import BigQuery, BigQueryError
-from otava.config import Config
+from otava.config import Config, ConfigError
from otava.data_selector import DataSelector
from otava.grafana import Annotation, Grafana, GrafanaError
from otava.graphite import GraphiteError
@@ -514,13 +514,10 @@ def analysis_options_from_args(args: argparse.Namespace)
-> AnalysisOptions:
return conf
-def create_otava_cli_parser() -> argparse.ArgumentParser:
- parser = argparse.ArgumentParser(
- description="Hunts performance regressions in Fallout results",
- parents=[config.create_config_parser()],
- config_file_parser_class=config.NestedYAMLConfigFileParser,
- allow_abbrev=False, # required for correct parsing of nested values
from config file
- )
+def main():
+ logging.basicConfig(format="%(levelname)s: %(message)s",
level=logging.INFO)
+
+ parser = argparse.ArgumentParser(description="Change Detection for
Continuous Performance Engineering")
subparsers = parser.add_subparsers(dest="command")
list_tests_parser = subparsers.add_parser("list-tests", help="list
available tests")
@@ -595,17 +592,22 @@ def create_otava_cli_parser() -> argparse.ArgumentParser:
"validate", help="validates the tests and metrics defined in the
configuration"
)
- return parser
+ # Parse arguments first, before loading config
+ args = parser.parse_args()
+ # If no command provided, just print usage and exit (no config needed)
+ if args.command is None:
+ parser.print_usage()
+ return
-def script_main(conf: Config = None, args: List[str] = None):
- logging.basicConfig(format="%(levelname)s: %(message)s",
level=logging.INFO)
- parser = create_otava_cli_parser()
+ # Now load the config only when we actually need it
+ try:
+ conf = config.load_config()
+ except ConfigError as err:
+ logging.error(err.message)
+ exit(1)
try:
- args, _ = parser.parse_known_args(args=args)
- if conf is None:
- conf = config.load_config_from_parser_args(args)
otava = Otava(conf)
if args.command == "list-groups":
@@ -730,9 +732,5 @@ def script_main(conf: Config = None, args: List[str] =
None):
exit(1)
-def main():
- script_main()
-
-
if __name__ == "__main__":
main()
diff --git a/otava/postgres.py b/otava/postgres.py
index 7a2aaa0..6b98c0f 100644
--- a/otava/postgres.py
+++ b/otava/postgres.py
@@ -27,32 +27,12 @@ from otava.test_config import PostgresTestConfig
@dataclass
class PostgresConfig:
- NAME = "postgres"
-
hostname: str
port: int
username: str
password: str
database: str
- @staticmethod
- def add_parser_args(arg_group):
- arg_group.add_argument("--postgres-hostname", help="PostgreSQL server
hostname", env_var="POSTGRES_HOSTNAME")
- arg_group.add_argument("--postgres-port", type=int, help="PostgreSQL
server port", env_var="POSTGRES_PORT")
- arg_group.add_argument("--postgres-username", help="PostgreSQL
username", env_var="POSTGRES_USERNAME")
- arg_group.add_argument("--postgres-password", help="PostgreSQL
password", env_var="POSTGRES_PASSWORD")
- arg_group.add_argument("--postgres-database", help="PostgreSQL
database name", env_var="POSTGRES_DATABASE")
-
- @staticmethod
- def from_parser_args(args):
- return PostgresConfig(
- hostname=getattr(args, 'postgres_hostname', None),
- port=getattr(args, 'postgres_port', None),
- username=getattr(args, 'postgres_username', None),
- password=getattr(args, 'postgres_password', None),
- database=getattr(args, 'postgres_database', None)
- )
-
@dataclass
class PostgresError(Exception):
diff --git a/tests/resources/substitution_test_config.yaml
b/otava/resources/otava.yaml
similarity index 60%
rename from tests/resources/substitution_test_config.yaml
rename to otava/resources/otava.yaml
index 6b6b024..44479f3 100644
--- a/tests/resources/substitution_test_config.yaml
+++ b/otava/resources/otava.yaml
@@ -15,25 +15,34 @@
# specific language governing permissions and limitations
# under the License.
-slack:
- token: config_slack_token
-
+# External systems connectors configuration:
graphite:
- url: config_graphite_url
+ url: ${GRAPHITE_ADDRESS}
grafana:
- url: config_grafana_url
- user: config_grafana_user
- password: config_grafana_password
+ url: ${GRAFANA_ADDRESS}
+ user: ${GRAFANA_USER}
+ password: ${GRAFANA_PASSWORD}
+
+slack:
+ token: ${SLACK_BOT_TOKEN}
+
+# Templates define common bits shared between test definitions:
+templates:
+
+# Define your tests here:
+tests:
+ local.sample:
+ type: csv
+ file: tests/resources/sample.csv
+ time_column: time
+ metrics: [metric1, metric2]
+ attributes: [commit]
+ csv_options:
+ delimiter: ','
+ quote_char: "'"
-bigquery:
- project_id: config_bigquery_project_id
- dataset: config_bigquery_dataset
- credentials: config_bigquery_credentials
-postgres:
- hostname: config_postgres_hostname
- port: 1111
- username: config_postgres_username
- password: config_postgres_password
- database: config_postgres_database
+test_groups:
+ local:
+ - local.sample
diff --git a/otava/slack.py b/otava/slack.py
index a0d4bc3..1cad43f 100644
--- a/otava/slack.py
+++ b/otava/slack.py
@@ -34,24 +34,8 @@ class NotificationError(Exception):
@dataclass
class SlackConfig:
- NAME = "slack"
-
bot_token: str
- @staticmethod
- def add_parser_args(parser):
- parser.add_argument(
- "--slack-token",
- help="Slack bot token to use for sending notifications",
- env_var="SLACK_BOT_TOKEN",
- )
-
- @staticmethod
- def from_parser_args(args):
- return SlackConfig(
- bot_token=getattr(args, "slack_token", None)
- )
-
class SlackNotification:
tests_with_insufficient_data: List[str]
diff --git a/pyproject.toml b/pyproject.toml
index 6bb1409..46edadd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ classifiers = [
]
dependencies = [
"dateparser>=1.0.0",
+ "expandvars>=0.6.5",
"numpy==1.24.*",
"python-dateutil>=2.8.1",
"signal-processing-algorithms==1.3.5",
@@ -47,8 +48,6 @@ dependencies = [
"slack-sdk>=3.4.2",
"google-cloud-bigquery>=3.25.0",
"pg8000>=1.31.2",
- "configargparse>=1.7.1",
- "expandvars>=0.12.0",
]
[project.optional-dependencies]
diff --git a/tests/config_test.py b/tests/config_test.py
index 4f66b95..a59e83c 100644
--- a/tests/config_test.py
+++ b/tests/config_test.py
@@ -14,25 +14,15 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-import os
-import tempfile
-from io import StringIO
-import pytest
-from expandvars import UnboundVariable
+from pathlib import Path
-from otava.config import (
- NestedYAMLConfigFileParser,
- create_config_parser,
- expand_env_vars_recursive,
- load_config_from_file,
- load_config_from_parser_args,
-)
+from otava.config import load_config_from
from otava.test_config import CsvTestConfig, GraphiteTestConfig,
HistoStatTestConfig
def test_load_graphite_tests():
- config = load_config_from_file("tests/resources/sample_config.yaml")
+ config = load_config_from(Path("tests/resources/sample_config.yaml"))
tests = config.tests
assert len(tests) == 4
test = tests["remote1"]
@@ -49,7 +39,7 @@ def test_load_graphite_tests():
def test_load_csv_tests():
- config = load_config_from_file("tests/resources/sample_config.yaml")
+ config = load_config_from(Path("tests/resources/sample_config.yaml"))
tests = config.tests
assert len(tests) == 4
test = tests["local1"]
@@ -70,372 +60,17 @@ def test_load_csv_tests():
def test_load_test_groups():
- config = load_config_from_file("tests/resources/sample_config.yaml")
+ config = load_config_from(Path("tests/resources/sample_config.yaml"))
groups = config.test_groups
assert len(groups) == 2
assert len(groups["remote"]) == 2
def test_load_histostat_config():
- config =
load_config_from_file("tests/resources/histostat_test_config.yaml")
+ config =
load_config_from(Path("tests/resources/histostat_test_config.yaml"))
tests = config.tests
assert len(tests) == 1
test = tests["histostat-sample"]
assert isinstance(test, HistoStatTestConfig)
# 14 tags * 12 tag_metrics == 168 unique metrics
assert len(test.fully_qualified_metric_names()) == 168
-
-
[email protected](
- "config_property",
- [
- # property, accessor, env_var, cli_flag, [config value, env value, cli
value]
- ("slack_token", lambda c: c.slack.bot_token, "SLACK_BOT_TOKEN",
"--slack-token"),
- ("bigquery_project_id", lambda c: c.bigquery.project_id,
"BIGQUERY_PROJECT_ID", "--bigquery-project-id"),
- ("bigquery_dataset", lambda c: c.bigquery.dataset, "BIGQUERY_DATASET",
"--bigquery-dataset"),
- ("bigquery_credentials", lambda c: c.bigquery.credentials,
"BIGQUERY_VAULT_SECRET", "--bigquery-credentials"),
- ("grafana_url", lambda c: c.grafana.url, "GRAFANA_ADDRESS",
"--grafana-url"),
- ("grafana_user", lambda c: c.grafana.user, "GRAFANA_USER",
"--grafana-user"),
- ("grafana_password", lambda c: c.grafana.password, "GRAFANA_PASSWORD",
"--grafana-password"),
- ("graphite_url", lambda c: c.graphite.url, "GRAPHITE_ADDRESS",
"--graphite-url"),
- ("postgres_hostname", lambda c: c.postgres.hostname,
"POSTGRES_HOSTNAME", "--postgres-hostname"),
- ("postgres_port", lambda c: c.postgres.port, "POSTGRES_PORT",
"--postgres-port", 1111, 2222, 3333),
- ("postgres_username", lambda c: c.postgres.username,
"POSTGRES_USERNAME", "--postgres-username"),
- ("postgres_password", lambda c: c.postgres.password,
"POSTGRES_PASSWORD", "--postgres-password"),
- ("postgres_database", lambda c: c.postgres.database,
"POSTGRES_DATABASE", "--postgres-database"),
- ],
- ids=lambda v: v[0], # use the property name for the parameterized test
name
-)
-def test_configuration_substitutions(config_property):
- config_file = "tests/resources/substitution_test_config.yaml"
- accessor = config_property[1]
-
- if len(config_property) == 4:
- config_value = f"config_{config_property[0]}"
- env_config_value = f"env_{config_property[0]}"
- cli_config_value = f"cli_{config_property[0]}"
- else:
- config_value = config_property[4]
- env_config_value = config_property[5]
- cli_config_value = config_property[6]
-
- # test value from config file
- config = load_config_from_file(config_file)
- assert accessor(config) == config_value
-
- # test env var overrides values from config file
- os.environ[config_property[2]] = str(env_config_value)
- try:
- config = load_config_from_file(config_file)
- assert accessor(config) == env_config_value
- finally:
- os.environ.pop(config_property[2])
-
- # test cli values override values from config file
- config = load_config_from_file(config_file,
arg_overrides=[config_property[3], str(cli_config_value)])
- assert accessor(config) == cli_config_value
-
- # test cli values override values from config file and env var
- os.environ[config_property[2]] = str(env_config_value)
- try:
- config = load_config_from_file(config_file,
arg_overrides=[config_property[3], str(cli_config_value)])
- assert accessor(config) == cli_config_value
- finally:
- os.environ.pop(config_property[2])
-
-
-def test_config_section_yaml_parser_flattens_only_config_sections():
- """Test that NestedYAMLConfigFileParser only flattens the specified config
sections."""
-
- parser = NestedYAMLConfigFileParser()
- test_yaml = """
-graphite:
- url: http://example.com
- timeout: 30
-slack:
- bot_token: test-token
- channel: "#alerts"
-postgres:
- hostname: localhost
- port: 5432
-templates:
- aggregate_mem:
- type: postgres
- time_column: commit_ts
- attributes: [experiment_id, config_id, commit]
- metrics:
- process_cumulative_rate_mean:
- direction: 1
- scale: 1
- process_cumulative_rate_stderr:
- direction: -1
- scale: 1
- process_cumulative_rate_diff:
- direction: -1
- scale: 1
- query: |
- SELECT e.commit,
- e.commit_ts,
- r.process_cumulative_rate_mean,
- r.process_cumulative_rate_stderr,
- r.process_cumulative_rate_diff,
- r.experiment_id,
- r.config_id
- FROM results r
- INNER JOIN configs c ON r.config_id = c.id
- INNER JOIN experiments e ON r.experiment_id = e.id
- WHERE e.exclude_from_analysis = false AND
- e.branch = 'trunk' AND
- e.username = 'ci' AND
- c.store = 'MEM' AND
- c.cache = true AND
- c.benchmark = 'aggregate' AND
- c.instance_type = 'ec2i3.large'
- ORDER BY e.commit_ts ASC;
-"""
-
- stream = StringIO(test_yaml)
- result = parser.parse(stream)
-
- # Should flatten config sections
- expected_keys = {
- 'graphite-url', 'graphite-timeout',
- 'slack-bot-token', 'slack-channel',
- 'postgres-hostname', 'postgres-port'
- }
-
- assert set(result.keys()) == expected_keys
- assert result['graphite-url'] == 'http://example.com'
- assert result['graphite-timeout'] == '30'
- assert result['slack-bot-token'] == 'test-token'
- assert result['slack-channel'] == '#alerts'
- assert result['postgres-hostname'] == 'localhost'
- assert result['postgres-port'] == '5432'
-
- # Should NOT contain any keys from ignored sections
- ignored_sections = {'templates', 'tests', 'test_groups'}
- for key in result.keys():
- section = key.split('-')[0]
- assert section not in ignored_sections, f"Found key '{key}' from
ignored section '{section}'"
-
-
-def test_expand_env_vars_recursive():
- """Test the expand_env_vars_recursive function with various data types."""
-
- # Set up test environment variables
- test_env_vars = {
- "TEST_HOST": "localhost",
- "TEST_PORT": "8080",
- "TEST_DB": "testdb",
- "TEST_USER": "testuser",
- }
-
- for key, value in test_env_vars.items():
- os.environ[key] = value
-
- try:
- # Test simple string expansion
- simple_string = "${TEST_HOST}:${TEST_PORT}"
- result = expand_env_vars_recursive(simple_string)
- assert result == "localhost:8080"
-
- # Test dictionary expansion
- test_dict = {
- "host": "${TEST_HOST}",
- "port": "${TEST_PORT}",
- "database": "${TEST_DB}",
- "connection_string":
"postgresql://${TEST_USER}@${TEST_HOST}:${TEST_PORT}/${TEST_DB}",
- "timeout": 30, # non-string should remain unchanged
- "enabled": True, # non-string should remain unchanged
- }
-
- result_dict = expand_env_vars_recursive(test_dict)
- expected_dict = {
- "host": "localhost",
- "port": "8080",
- "database": "testdb",
- "connection_string": "postgresql://testuser@localhost:8080/testdb",
- "timeout": 30,
- "enabled": True,
- }
- assert result_dict == expected_dict
-
- # Test list expansion
- test_list = [
- "${TEST_HOST}",
- {"nested_host": "${TEST_HOST}", "nested_port": "${TEST_PORT}"},
- ["${TEST_USER}", "${TEST_DB}"],
- 123, # non-string should remain unchanged
- ]
-
- result_list = expand_env_vars_recursive(test_list)
- expected_list = [
- "localhost",
- {"nested_host": "localhost", "nested_port": "8080"},
- ["testuser", "testdb"],
- 123,
- ]
- assert result_list == expected_list
-
- # Test undefined variables (should throw UnboundVariable)
- with pytest.raises(UnboundVariable, match="'UNDEFINED_VAR: unbound
variable"):
- expand_env_vars_recursive("${UNDEFINED_VAR}")
-
- # Test mixed defined/undefined variables (should throw UnboundVariable)
- with pytest.raises(UnboundVariable, match="'UNDEFINED_VAR: unbound
variable"):
-
expand_env_vars_recursive("prefix-${TEST_HOST}-middle-${UNDEFINED_VAR}-suffix")
-
- finally:
- # Clean up environment variables
- for key in test_env_vars:
- if key in os.environ:
- del os.environ[key]
-
-
-def test_env_var_expansion_in_templates_and_tests():
- """Test that environment variable expansion works in template and test
sections."""
-
- # Set up test environment variables
- test_env_vars = {
- "CSV_DELIMITER": "$",
- "CSV_QUOTE_CHAR": "!",
- "CSV_FILENAME": "/tmp/test.csv",
- }
-
- for key, value in test_env_vars.items():
- os.environ[key] = value
-
- # Create a temporary config file with env var placeholders
- config_content = """
-templates:
- csv_template_1:
- csv_options:
- delimiter: "${CSV_DELIMITER}"
-
- csv_template_2:
- csv_options:
- quote_char: '${CSV_QUOTE_CHAR}'
-
-tests:
- expansion_test:
- type: csv
- file: ${CSV_FILENAME}
- time_column: timestamp
- metrics:
- response_time:
- column: response_ms
- unit: ms
- inherit: [csv_template_1, csv_template_2]
-"""
-
- try:
- with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml",
delete=False) as f:
- f.write(config_content)
- config_file_path = f.name
-
- try:
- # Load config and verify expansion worked
- parser = create_config_parser()
- args = parser.parse_args(["--config-file", config_file_path])
- config = load_config_from_parser_args(args)
-
- # Verify test was loaded
- assert "expansion_test" in config.tests
- test = config.tests["expansion_test"]
- assert isinstance(test, CsvTestConfig)
-
- # Verify that expansion worked
- assert test.file == test_env_vars["CSV_FILENAME"]
-
- # Verify that inheritance from templates worked with expanded
values
- assert test.csv_options.delimiter == test_env_vars["CSV_DELIMITER"]
- assert test.csv_options.quote_char ==
test_env_vars["CSV_QUOTE_CHAR"]
-
- finally:
- os.unlink(config_file_path)
-
- finally:
- # Clean up environment variables
- for key in test_env_vars:
- if key in os.environ:
- del os.environ[key]
-
-
-def test_cli_precedence_over_env_vars():
- """Test that CLI arguments take precedence over environment variables."""
-
- # Set up environment variables
- env_vars = {
- "POSTGRES_HOSTNAME": "env-host.com",
- "POSTGRES_PORT": "5433",
- "POSTGRES_DATABASE": "env_db",
- "SLACK_BOT_TOKEN": "env-slack-token",
- }
-
- for key, value in env_vars.items():
- os.environ[key] = value
-
- # Create a simple config file
- config_content = """
-postgres:
- hostname: config_host
- port: 5432
- database: config_db
- username: config_user
- password: config_pass
-
-slack:
- token: config_slack_token
-"""
-
- try:
- with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml",
delete=False) as f:
- f.write(config_content)
- config_file_path = f.name
-
- try:
- # Test 1: Only environment variables (no CLI overrides)
- config_env_only = load_config_from_file(config_file_path)
-
- # Environment variables should override config file values
- assert config_env_only.postgres.hostname == "env-host.com"
- assert config_env_only.postgres.port == 5433
- assert config_env_only.postgres.database == "env_db"
- assert config_env_only.slack.bot_token == "env-slack-token"
-
- # Values without env vars should use config file values
- assert config_env_only.postgres.username == "config_user"
- assert config_env_only.postgres.password == "config_pass"
-
- # Test 2: CLI arguments should override environment variables
- cli_overrides = [
- "--postgres-hostname",
- "cli-host.com",
- "--postgres-port",
- "5434",
- "--slack-token",
- "cli-slack-token",
- ]
-
- config_cli_override = load_config_from_file(
- config_file_path, arg_overrides=cli_overrides
- )
-
- # CLI overrides should win
- assert config_cli_override.postgres.hostname == "cli-host.com"
- assert config_cli_override.postgres.port == 5434
- assert config_cli_override.slack.bot_token == "cli-slack-token"
-
- # Values without CLI override should still use env vars
- assert config_cli_override.postgres.database == "env_db"
- assert config_cli_override.postgres.username == "config_user"
- assert config_cli_override.postgres.password == "config_pass"
-
- finally:
- os.unlink(config_file_path)
-
- finally:
- # Clean up environment variables
- for key in env_vars:
- if key in os.environ:
- del os.environ[key]
diff --git a/tests/csv_e2e_test.py b/tests/csv_e2e_test.py
new file mode 100644
index 0000000..10f94bf
--- /dev/null
+++ b/tests/csv_e2e_test.py
@@ -0,0 +1,230 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import csv
+import os
+import subprocess
+import tempfile
+import textwrap
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+
+import pytest
+
+
+def test_analyze_csv():
+ """
+ End-to-end test for the CSV example from docs/CSV.md.
+
+ Writes a temporary CSV and otava.yaml, runs:
+ uv run otava analyze local.sample
+ in the temporary directory, and compares stdout to the expected output.
+ """
+
+ now = datetime.now()
+ n = 10
+ timestamps = [now - timedelta(days=i) for i in range(n)]
+ metrics1 = [154023, 138455, 143112, 149190, 132098, 151344, 155145,
148889, 149466, 148209]
+ metrics2 = [10.43, 10.23, 10.29, 10.91, 10.34, 10.69, 9.23, 9.11, 9.13,
9.03]
+ data_points = []
+ for i in range(n):
+ data_points.append(
+ (
+ timestamps[i].strftime("%Y.%m.%d %H:%M:%S %z"), # time
+ "aaa" + str(i), # commit
+ metrics1[i],
+ metrics2[i],
+ )
+ )
+
+ config_content = textwrap.dedent(
+ """\
+ tests:
+ local.sample:
+ type: csv
+ file: data/local_sample.csv
+ time_column: time
+ attributes: [commit]
+ metrics: [metric1, metric2]
+ csv_options:
+ delimiter: ","
+ quotechar: "'"
+ """
+ )
+ expected_output = textwrap.dedent(
+ """\
+ time commit metric1 metric2
+ ------------------------- -------- --------- ---------
+ {} aaa0 154023 10.43
+ {} aaa1 138455 10.23
+ {} aaa2 143112 10.29
+ {} aaa3 149190 10.91
+ {} aaa4 132098 10.34
+ {} aaa5 151344 10.69
+ ·········
+ -12.9%
+ ·········
+ {} aaa6 155145 9.23
+ {} aaa7 148889 9.11
+ {} aaa8 149466 9.13
+ {} aaa9 148209 9.03
+ """.format(
+ *[ts.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S +0000")
for ts in timestamps]
+ )
+ )
+ with tempfile.TemporaryDirectory() as td:
+ td_path = Path(td)
+ # create data directory and write CSV
+ data_dir = td_path / "data"
+ data_dir.mkdir(parents=True, exist_ok=True)
+ csv_path = data_dir / "local_sample.csv"
+ with open(csv_path, "w", newline="") as f:
+ writer = csv.writer(f)
+ writer.writerow(["time", "commit", "metric1", "metric2"])
+ writer.writerows(data_points)
+
+ # write otava.yaml in temp cwd
+ config_path = td_path / "otava.yaml"
+ config_path.write_text(config_content, encoding="utf-8")
+
+ # run command
+ cmd = ["uv", "run", "otava", "analyze", "local.sample"]
+ proc = subprocess.run(
+ cmd,
+ cwd=str(td_path),
+ capture_output=True,
+ text=True,
+ timeout=120,
+ env=dict(os.environ, OTAVA_CONFIG=config_path),
+ )
+
+ if proc.returncode != 0:
+ pytest.fail(
+ "Command returned non-zero exit code.\n\n"
+ f"Command: {cmd!r}\n"
+ f"Exit code: {proc.returncode}\n\n"
+ f"Stdout:\n{proc.stdout}\n\n"
+ f"Stderr:\n{proc.stderr}\n"
+ )
+
+ # Python 3.9 and earlier does not print it for some reason...
+ output_without_log = proc.stdout.replace(
+ "Computing change points for test local.sample...", ""
+ )
+ # Python 3.9 complains about
importlib.metadata.packages_distributions...
+ output_without_log = output_without_log.replace(
+ "An error occurred: module 'importlib.metadata' has no attribute
'packages_distributions'",
+ "",
+ )
+ assert _remove_trailing_whitespaces(output_without_log) ==
expected_output.rstrip("\n")
+
+
+def test_regressions_csv():
+ """
+ End-to-end test for the CSV example from docs/CSV.md.
+
+ Writes a temporary CSV and otava.yaml, runs:
+ uv run otava analyze local.sample
+ in the temporary directory, and compares stdout to the expected output.
+ """
+
+ now = datetime.now()
+ n = 10
+ timestamps = [now - timedelta(days=i) for i in range(n)]
+ metrics1 = [154023, 138455, 143112, 149190, 132098, 151344, 155145,
148889, 149466, 148209]
+ metrics2 = [10.43, 10.23, 10.29, 10.91, 10.34, 10.69, 9.23, 9.11, 9.13,
9.03]
+ data_points = []
+ for i in range(n):
+ data_points.append(
+ (
+ timestamps[i].strftime("%Y.%m.%d %H:%M:%S %z"), # time
+ "aaa" + str(i), # commit
+ metrics1[i],
+ metrics2[i],
+ )
+ )
+
+ config_content = textwrap.dedent(
+ """\
+ tests:
+ local.sample:
+ type: csv
+ file: data/local_sample.csv
+ time_column: time
+ attributes: [commit]
+ metrics: [metric1, metric2]
+ csv_options:
+ delimiter: ","
+ quotechar: "'"
+ """
+ )
+ expected_output = textwrap.dedent(
+ """\
+ local.sample:
+ metric2 : 10.5 --> 9.12 ( -12.9%)
+ Regressions in 1 test found
+ """
+ )
+ with tempfile.TemporaryDirectory() as td:
+ td_path = Path(td)
+ # create data directory and write CSV
+ data_dir = td_path / "data"
+ data_dir.mkdir(parents=True, exist_ok=True)
+ csv_path = data_dir / "local_sample.csv"
+ with open(csv_path, "w", newline="") as f:
+ writer = csv.writer(f)
+ writer.writerow(["time", "commit", "metric1", "metric2"])
+ writer.writerows(data_points)
+
+ # write otava.yaml in temp cwd
+ config_path = td_path / "otava.yaml"
+ config_path.write_text(config_content, encoding="utf-8")
+
+ # run command
+ cmd = ["uv", "run", "otava", "regressions", "local.sample"]
+ proc = subprocess.run(
+ cmd,
+ cwd=str(td_path),
+ capture_output=True,
+ text=True,
+ timeout=120,
+ env=dict(os.environ, OTAVA_CONFIG=config_path),
+ )
+
+ if proc.returncode != 0:
+ pytest.fail(
+ "Command returned non-zero exit code.\n\n"
+ f"Command: {cmd!r}\n"
+ f"Exit code: {proc.returncode}\n\n"
+ f"Stdout:\n{proc.stdout}\n\n"
+ f"Stderr:\n{proc.stderr}\n"
+ )
+
+ # Python 3.9 and earlier does not print it for some reason...
+ output_without_log = proc.stdout.replace(
+ "Computing change points for test local.sample...", ""
+ )
+ # Python 3.9 complains about
importlib.metadata.packages_distributions...
+ output_without_log = output_without_log.replace(
+ "An error occurred: module 'importlib.metadata' has no attribute
'packages_distributions'",
+ "",
+ )
+ assert _remove_trailing_whitespaces(output_without_log) ==
expected_output.rstrip("\n")
+
+
+def _remove_trailing_whitespaces(s: str) -> str:
+ return "\n".join(line.rstrip() for line in s.splitlines()).strip()
diff --git a/tests/postgres_e2e_test.py b/tests/postgres_e2e_test.py
new file mode 100644
index 0000000..e14de8d
--- /dev/null
+++ b/tests/postgres_e2e_test.py
@@ -0,0 +1,415 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import shutil
+import socket
+import subprocess
+import textwrap
+import time
+from contextlib import contextmanager
+from pathlib import Path
+
+import pytest
+
+
+def test_analyze():
+ """
+ End-to-end test for the PostgreSQL example.
+
+ Starts the docker-compose stack from
examples/postgresql/docker-compose.yaml,
+ waits for Postgres to be ready, runs the otava analysis in a one-off
+ container, and compares stdout to the expected output (seeded data uses
+ deterministic 2025 timestamps).
+ """
+ with postgres_container() as (postgres_container_id, host_port):
+ # Run the Otava analysis
+ proc = subprocess.run(
+ ["uv", "run", "otava", "analyze", "aggregate_mem"],
+ capture_output=True,
+ text=True,
+ timeout=600,
+ env=dict(
+ os.environ,
+ OTAVA_CONFIG=Path("examples/postgresql/config/otava.yaml"),
+ POSTGRES_HOSTNAME="localhost",
+ POSTGRES_PORT=host_port,
+ POSTGRES_USERNAME="exampleuser",
+ POSTGRES_PASSWORD="examplepassword",
+ POSTGRES_DATABASE="benchmark_results",
+ BRANCH="trunk",
+ ),
+ )
+
+ if proc.returncode != 0:
+ pytest.fail(
+ "Command returned non-zero exit code.\n\n"
+ f"Command: {proc.args!r}\n"
+ f"Exit code: {proc.returncode}\n\n"
+ f"Stdout:\n{proc.stdout}\n\n"
+ f"Stderr:\n{proc.stderr}\n"
+ )
+
+ expected_output = textwrap.dedent(
+ """\
+time experiment_id commit config_id
process_cumulative_rate_mean process_cumulative_rate_stderr
process_cumulative_rate_diff
+------------------------- ------------------ -------- -----------
------------------------------ --------------------------------
------------------------------
+2025-03-13 10:03:02 +0000 aggregate-36e5ccd2 36e5ccd2 1
61160 2052
13558
+2025-03-25 10:03:02 +0000 aggregate-d5460f38 d5460f38 1
60160 2142
13454
+2025-04-02 10:03:02 +0000 aggregate-bc9425cb bc9425cb 1
60960 2052
13053
+
······························
+
-5.6%
+
······························
+2025-04-06 10:03:02 +0000 aggregate-14df1b11 14df1b11 1
57123 2052
14052
+2025-04-13 10:03:02 +0000 aggregate-ac40c0d8 ac40c0d8 1
57980 2052
13521
+2025-04-27 10:03:02 +0000 aggregate-0af4ccbc 0af4ccbc 1
56950 2052
13532
+ """
+ )
+ assert _remove_trailing_whitespaces(proc.stdout) ==
expected_output.rstrip("\n")
+
+ # Verify the DB was updated with the detected change.
+ # Query the updated change metric at the detected change point.
+ query_proc = subprocess.run(
+ [
+ "docker",
+ "exec",
+ postgres_container_id,
+ "psql",
+ "-U",
+ "exampleuser",
+ "-d",
+ "benchmark_results",
+ "-Atc",
+ """
+ SELECT
+ process_cumulative_rate_mean_rel_forward_change,
+ process_cumulative_rate_mean_rel_backward_change,
+ process_cumulative_rate_mean_p_value
+ FROM results
+ WHERE experiment_id='aggregate-14df1b11' AND config_id=1;
+ """,
+ ],
+ capture_output=True,
+ text=True,
+ timeout=60,
+ )
+ if query_proc.returncode != 0:
+ pytest.fail(
+ "Command returned non-zero exit code.\n\n"
+ f"Command: {query_proc.args!r}\n"
+ f"Exit code: {query_proc.returncode}\n\n"
+ f"Stdout:\n{query_proc.stdout}\n\n"
+ f"Stderr:\n{query_proc.stderr}\n"
+ )
+
+ # psql -Atc returns rows like: value|pvalue
+ forward_change, backward_change, p_value =
query_proc.stdout.strip().split("|")
+ # --update-postgres was not specified, so no change point should be
recorded
+ assert forward_change == backward_change == p_value == ""
+
+
+def test_analyze_and_update_postgres():
+ """
+ End-to-end test for the PostgreSQL example.
+
+ Starts the docker-compose stack from
examples/postgresql/docker-compose.yaml,
+ waits for Postgres to be ready, runs the otava analysis in a one-off
+ container, and compares stdout to the expected output (seeded data uses
+ deterministic 2025 timestamps).
+ """
+ with postgres_container() as (postgres_container_id, host_port):
+ # Run the Otava analysis
+ proc = subprocess.run(
+ ["uv", "run", "otava", "analyze", "aggregate_mem",
"--update-postgres"],
+ capture_output=True,
+ text=True,
+ timeout=600,
+ env=dict(
+ os.environ,
+ OTAVA_CONFIG=Path("examples/postgresql/config/otava.yaml"),
+ POSTGRES_HOSTNAME="localhost",
+ POSTGRES_PORT=host_port,
+ POSTGRES_USERNAME="exampleuser",
+ POSTGRES_PASSWORD="examplepassword",
+ POSTGRES_DATABASE="benchmark_results",
+ BRANCH="trunk",
+ ),
+ )
+
+ if proc.returncode != 0:
+ pytest.fail(
+ "Command returned non-zero exit code.\n\n"
+ f"Command: {proc.args!r}\n"
+ f"Exit code: {proc.returncode}\n\n"
+ f"Stdout:\n{proc.stdout}\n\n"
+ f"Stderr:\n{proc.stderr}\n"
+ )
+
+ expected_output = textwrap.dedent(
+ """\
+ time experiment_id commit config_id
process_cumulative_rate_mean process_cumulative_rate_stderr
process_cumulative_rate_diff
+ ------------------------- ------------------ -------- -----------
------------------------------ --------------------------------
------------------------------
+ 2025-03-13 10:03:02 +0000 aggregate-36e5ccd2 36e5ccd2 1
61160 2052
13558
+ 2025-03-25 10:03:02 +0000 aggregate-d5460f38 d5460f38 1
60160 2142
13454
+ 2025-04-02 10:03:02 +0000 aggregate-bc9425cb bc9425cb 1
60960 2052
13053
+
······························
+
-5.6%
+
······························
+ 2025-04-06 10:03:02 +0000 aggregate-14df1b11 14df1b11 1
57123 2052
14052
+ 2025-04-13 10:03:02 +0000 aggregate-ac40c0d8 ac40c0d8 1
57980 2052
13521
+ 2025-04-27 10:03:02 +0000 aggregate-0af4ccbc 0af4ccbc 1
56950 2052
13532
+ """
+ )
+ assert _remove_trailing_whitespaces(proc.stdout) ==
expected_output.rstrip("\n")
+
+ # Verify the DB was updated with the detected change.
+ # Query the updated change metric at the detected change point.
+ query_proc = subprocess.run(
+ [
+ "docker",
+ "exec",
+ postgres_container_id,
+ "psql",
+ "-U",
+ "exampleuser",
+ "-d",
+ "benchmark_results",
+ "-Atc",
+ """
+ SELECT
+ process_cumulative_rate_mean_rel_forward_change,
+ process_cumulative_rate_mean_rel_backward_change,
+ process_cumulative_rate_mean_p_value
+ FROM results
+ WHERE experiment_id='aggregate-14df1b11' AND config_id=1;
+ """,
+ ],
+ capture_output=True,
+ text=True,
+ timeout=60,
+ )
+ if query_proc.returncode != 0:
+ pytest.fail(
+ "Command returned non-zero exit code.\n\n"
+ f"Command: {query_proc.args!r}\n"
+ f"Exit code: {query_proc.returncode}\n\n"
+ f"Stdout:\n{query_proc.stdout}\n\n"
+ f"Stderr:\n{query_proc.stderr}\n"
+ )
+
+ # psql -Atc returns rows like: value|pvalue
+ forward_change, backward_change, p_value =
query_proc.stdout.strip().split("|")
+ forward_change = float(forward_change)
+ backward_change = float(backward_change)
+ p_value = float(p_value)
+
+ if abs(forward_change - (-5.6)) > 0.2:
+ pytest.fail(f"DB change value {forward_change!r} not within
tolerance of -5.6")
+ if abs(backward_change - 5.94) > 0.2:
+ pytest.fail(f"DB backward change {backward_change!r} not within
tolerance of 5.94")
+ if p_value >= 0.001:
+ pytest.fail(f"DB p-value {p_value!r} not less than 0.01")
+
+
+def test_regressions():
+ """
+ End-to-end test for the PostgreSQL regressions command.
+
+ Starts the docker-compose stack from
examples/postgresql/docker-compose.yaml,
+ waits for Postgres to be ready, runs the otava regressions command,
+ and compares stdout to the expected output.
+ """
+ with postgres_container() as (postgres_container_id, host_port):
+ # Run the Otava regressions command
+ proc = subprocess.run(
+ ["uv", "run", "otava", "regressions", "aggregate_mem"],
+ capture_output=True,
+ text=True,
+ timeout=600,
+ env=dict(
+ os.environ,
+ OTAVA_CONFIG=Path("examples/postgresql/config/otava.yaml"),
+ POSTGRES_HOSTNAME="localhost",
+ POSTGRES_PORT=host_port,
+ POSTGRES_USERNAME="exampleuser",
+ POSTGRES_PASSWORD="examplepassword",
+ POSTGRES_DATABASE="benchmark_results",
+ BRANCH="trunk",
+ ),
+ )
+
+ if proc.returncode != 0:
+ pytest.fail(
+ "Command returned non-zero exit code.\n\n"
+ f"Command: {proc.args!r}\n"
+ f"Exit code: {proc.returncode}\n\n"
+ f"Stdout:\n{proc.stdout}\n\n"
+ f"Stderr:\n{proc.stderr}\n"
+ )
+
+ expected_output = textwrap.dedent(
+ """\
+ aggregate_mem:
+ process_cumulative_rate_mean: 6.08e+04 --> 5.74e+04 ( -5.6%)
+ Regressions in 1 test found
+ """
+ )
+ assert proc.stdout == expected_output
+
+ # Verify the DB was NOT updated since --update-postgres was not
specified
+ query_proc = subprocess.run(
+ [
+ "docker",
+ "exec",
+ postgres_container_id,
+ "psql",
+ "-U",
+ "exampleuser",
+ "-d",
+ "benchmark_results",
+ "-Atc",
+ """
+ SELECT
+ process_cumulative_rate_mean_rel_forward_change,
+ process_cumulative_rate_mean_rel_backward_change,
+ process_cumulative_rate_mean_p_value
+ FROM results
+ WHERE experiment_id='aggregate-14df1b11' AND config_id=1;
+ """,
+ ],
+ capture_output=True,
+ text=True,
+ timeout=60,
+ )
+ if query_proc.returncode != 0:
+ pytest.fail(
+ "Command returned non-zero exit code.\n\n"
+ f"Command: {query_proc.args!r}\n"
+ f"Exit code: {query_proc.returncode}\n\n"
+ f"Stdout:\n{query_proc.stdout}\n\n"
+ f"Stderr:\n{query_proc.stderr}\n"
+ )
+
+ # psql -Atc returns rows like: value|pvalue
+ forward_change, backward_change, p_value =
query_proc.stdout.strip().split("|")
+ # --update-postgres was not specified, so no change point should be
recorded
+ assert forward_change == backward_change == p_value == ""
+
+
+@contextmanager
+def postgres_container():
+ """
+ Context manager for running a PostgreSQL container.
+ Yields the container ID and ensures cleanup on exit.
+ """
+ if not shutil.which("docker"):
+ pytest.fail("docker is not available on PATH")
+
+ container_id = None
+ try:
+ # Start postgres container
+ cmd = [
+ "docker",
+ "run",
+ "-d",
+ "--env",
+ "POSTGRES_USER=exampleuser",
+ "--env",
+ "POSTGRES_PASSWORD=examplepassword",
+ "--env",
+ "POSTGRES_DB=benchmark_results",
+ "--volume",
+
f"{Path('examples/postgresql/init-db').resolve()}:/docker-entrypoint-initdb.d",
+ "--publish",
+ "5432",
+ "postgres:latest",
+ ]
+ proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+ if proc.returncode != 0:
+ pytest.fail(
+ "Docker command returned non-zero exit code.\n\n"
+ f"Command: {cmd!r}\n"
+ f"Exit code: {proc.returncode}\n\n"
+ f"Stdout:\n{proc.stdout}\n\n"
+ f"Stderr:\n{proc.stderr}\n"
+ )
+ container_id = proc.stdout.strip()
+ # Determine the randomly assigned host port for 5432/tcp
+ inspect_cmd = [
+ "docker",
+ "inspect",
+ "-f",
+ '{{ (index (index .NetworkSettings.Ports "5432/tcp") 0).HostPort
}}',
+ container_id,
+ ]
+ inspect_proc = subprocess.run(inspect_cmd, capture_output=True,
text=True, timeout=60)
+ if inspect_proc.returncode != 0:
+ pytest.fail(
+ "Docker inspect returned non-zero exit code.\n\n"
+ f"Command: {inspect_cmd!r}\n"
+ f"Exit code: {inspect_proc.returncode}\n\n"
+ f"Stdout:\n{inspect_proc.stdout}\n\n"
+ f"Stderr:\n{inspect_proc.stderr}\n"
+ )
+ host_port = inspect_proc.stdout.strip()
+
+ # Wait until Postgres responds
+ deadline = time.time() + 60
+ ready = False
+ while time.time() < deadline:
+ # First ensure the assigned host port accepts TCP connections
+ try:
+ with socket.create_connection(("localhost", int(host_port)),
timeout=1):
+ port_ready = True
+ except OSError:
+ port_ready = False
+ continue
+
+ # Then check pg_isready inside the container
+ cmd = [
+ "docker",
+ "exec",
+ container_id,
+ "pg_isready",
+ "-U",
+ "exampleuser",
+ "-d",
+ "benchmark_results",
+ ]
+ proc = subprocess.run(cmd, capture_output=True, text=True)
+ if port_ready and proc.returncode == 0:
+ ready = True
+ break
+ time.sleep(1)
+
+ if not ready:
+ pytest.fail("Postgres did not become ready within timeout.")
+
+ yield container_id, host_port
+ finally:
+ if container_id:
+ res = subprocess.run(
+ ["docker", "stop", container_id], capture_output=True,
text=True, timeout=60
+ )
+ if res.returncode != 0:
+ pytest.fail(
+ f"Docker command returned non-zero exit code:
{res.returncode}\nStdout: {res.stdout}\nStderr: {res.stderr}"
+ )
+
+
+def _remove_trailing_whitespaces(s: str) -> str:
+ return "\n".join(line.rstrip() for line in s.splitlines())
diff --git a/uv.lock b/uv.lock
index f2b2833..fb4d970 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
version = 1
-revision = 3
+revision = 2
requires-python = ">=3.8, <3.11"
resolution-markers = [
"python_full_version >= '3.10'",
@@ -10,10 +10,9 @@ resolution-markers = [
[[package]]
name = "apache-otava"
-version = "0.7.0"
+version = "0.6.1"
source = { editable = "." }
dependencies = [
- { name = "configargparse" },
{ name = "dateparser" },
{ name = "expandvars" },
{ name = "google-cloud-bigquery", version = "3.30.0", source = { registry
= "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
@@ -54,9 +53,8 @@ dev = [
[package.metadata]
requires-dist = [
{ name = "autoflake", marker = "extra == 'dev'", specifier = ">=1.4" },
- { name = "configargparse", specifier = ">=1.7.1" },
{ name = "dateparser", specifier = ">=1.0.0" },
- { name = "expandvars", specifier = ">=0.12.0" },
+ { name = "expandvars", specifier = ">=0.6.5" },
{ name = "flake8", marker = "extra == 'dev'", specifier = ">=4.0.1" },
{ name = "google-cloud-bigquery", specifier = ">=3.25.0" },
{ name = "isort", marker = "extra == 'dev'", specifier = ">=5.10.1" },
@@ -220,15 +218,6 @@ wheels = [
{ url =
"https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl",
hash =
"sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size
= 25335, upload-time = "2022-10-25T02:36:20.889Z" },
]
-[[package]]
-name = "configargparse"
-version = "1.7.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url =
"https://files.pythonhosted.org/packages/85/4d/6c9ef746dfcc2a32e26f3860bb4a011c008c392b83eabdfb598d1a8bbe5d/configargparse-1.7.1.tar.gz",
hash =
"sha256:79c2ddae836a1e5914b71d58e4b9adbd9f7779d4e6351a637b7d2d9b6c46d3d9", size
= 43958, upload-time = "2025-05-23T14:26:17.369Z" }
-wheels = [
- { url =
"https://files.pythonhosted.org/packages/31/28/d28211d29bcc3620b1fece85a65ce5bb22f18670a03cd28ea4b75ede270c/configargparse-1.7.1-py3-none-any.whl",
hash =
"sha256:8b586a31f9d873abd1ca527ffbe58863c99f36d896e2829779803125e83be4b6", size
= 25607, upload-time = "2025-05-23T14:26:15.923Z" },
-]
-
[[package]]
name = "dateparser"
version = "1.2.0"
@@ -265,11 +254,11 @@ wheels = [
[[package]]
name = "expandvars"
-version = "1.1.2"
+version = "1.1.1"
source = { registry = "https://pypi.org/simple" }
-sdist = { url =
"https://files.pythonhosted.org/packages/9c/64/a9d8ea289d663a44b346203a24bf798507463db1e76679eaa72ee6de1c7a/expandvars-1.1.2.tar.gz",
hash =
"sha256:6c5822b7b756a99a356b915dd1267f52ab8a4efaa135963bd7f4bd5d368f71d7", size
= 70842, upload-time = "2025-09-12T10:55:20.929Z" }
+sdist = { url =
"https://files.pythonhosted.org/packages/dc/c9/c0a46f462058446aafe953bf76a957c17f78550216a95fbded2270f83117/expandvars-1.1.1.tar.gz",
hash =
"sha256:98add8268b760dfee457bde1c17bf745795fdebc22b7ddab75fd3278653f1e05", size
= 70787, upload-time = "2025-07-12T07:46:22.308Z" }
wheels = [
- { url =
"https://files.pythonhosted.org/packages/7f/e6/79c43f7a55264e479a9fbf21ddba6a73530b3ea8439a8bb7fa5a281721af/expandvars-1.1.2-py3-none-any.whl",
hash =
"sha256:d1652fe4e61914f5b88ada93aaedb396446f55ae4621de45c8cb9f66e5712526", size
= 7526, upload-time = "2025-09-12T10:55:18.779Z" },
+ { url =
"https://files.pythonhosted.org/packages/2b/ca/0753ba3a81255ac49748ec8b665ab01f8efcf711f74bbccb5457a6193acc/expandvars-1.1.1-py3-none-any.whl",
hash =
"sha256:09ca39e6bfcb0d899db8778a00dd3d89cfeb0080795c54f16f6279afd0ef8c5b", size
= 7522, upload-time = "2025-07-12T07:46:18.984Z" },
]
[[package]]
@@ -718,7 +707,7 @@ resolution-markers = [
"python_full_version == '3.9.*'",
]
dependencies = [
- { name = "zipp", version = "3.23.0", source = { registry =
"https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+ { name = "zipp", version = "3.23.0", source = { registry =
"https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
]
sdist = { url =
"https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz",
hash =
"sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size
= 56641, upload-time = "2025-04-27T15:29:01.736Z" }
wheels = [