From 535b2eed92a5010ea02ba7e04de03c6541f91f20 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Fri, 22 May 2026 12:29:02 +0800 Subject: [PATCH 01/29] feat: add GARM ROCK image spec and build script Builds GARM v0.2.1 and garm-provider-openstack v0.1.5 from source. Version tags are annotated for Renovate to track GitHub releases. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- build-garm-rock.sh | 13 +++++++++++++ garm-rockcraft.yaml | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100755 build-garm-rock.sh create mode 100644 garm-rockcraft.yaml diff --git a/build-garm-rock.sh b/build-garm-rock.sh new file mode 100755 index 00000000..a435dc24 --- /dev/null +++ b/build-garm-rock.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. +# + +export ROCKCRAFT_ENABLE_EXPERIMENTAL_EXTENSIONS=true + +ln -s ./garm-rockcraft.yaml ./rockcraft.yaml +rockcraft clean +rockcraft pack +rm ./rockcraft.yaml diff --git a/garm-rockcraft.yaml b/garm-rockcraft.yaml new file mode 100644 index 00000000..f0fc2568 --- /dev/null +++ b/garm-rockcraft.yaml @@ -0,0 +1,41 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +name: garm +base: bare +build-base: ubuntu@24.04 +version: "0.1" +summary: GARM — GitHub Actions Runner Manager +description: | + ROCK image containing the GARM binary and the OpenStack provider binary. +platforms: + amd64: + +parts: + garm: + plugin: go + source: https://github.com/cloudbase/garm + source-type: git + # renovate: datasource=github-releases depName=cloudbase/garm + source-tag: v0.2.1 + build-snaps: + - go/1.24/stable + override-build: | + cd "$CRAFT_PART_SRC" + go build -o bin/garm ./cmd/garm-server + mkdir -p "$CRAFT_PART_INSTALL/usr/local/bin" + cp bin/garm "$CRAFT_PART_INSTALL/usr/local/bin/" + + garm-provider-openstack: + plugin: go + source: https://github.com/cloudbase/garm-provider-openstack + source-type: git + # renovate: datasource=github-releases depName=cloudbase/garm-provider-openstack + source-tag: v0.1.5 + build-snaps: + - go/1.24/stable + override-build: | + cd "$CRAFT_PART_SRC" + go build -o bin/garm-provider-openstack . + mkdir -p "$CRAFT_PART_INSTALL/usr/local/bin" + cp bin/garm-provider-openstack "$CRAFT_PART_INSTALL/usr/local/bin/" From 90d439a8383070ad67594eb5416a90845d2feb84 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Fri, 22 May 2026 12:34:39 +0800 Subject: [PATCH 02/29] fix: correct GARM build path and add SQLite build tags - Use ./cmd/garm (not ./cmd/garm-server) as the main package path - Add osusergo,netgo,sqlite_omit_load_extension build tags so the statically-linked binary works inside the bare base image Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- garm-rockcraft.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garm-rockcraft.yaml b/garm-rockcraft.yaml index f0fc2568..88fd2353 100644 --- a/garm-rockcraft.yaml +++ b/garm-rockcraft.yaml @@ -22,7 +22,7 @@ parts: - go/1.24/stable override-build: | cd "$CRAFT_PART_SRC" - go build -o bin/garm ./cmd/garm-server + go build -tags osusergo,netgo,sqlite_omit_load_extension -o bin/garm ./cmd/garm mkdir -p "$CRAFT_PART_INSTALL/usr/local/bin" cp bin/garm "$CRAFT_PART_INSTALL/usr/local/bin/" From 204e04e2951d4393865a769d6482962981ea4d04 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Fri, 22 May 2026 12:36:26 +0800 Subject: [PATCH 03/29] feat: scaffold garm-operator charm metadata and tooling Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/charmcraft.yaml | 35 +++++++++++++++++++ charms/garm-operator/pyproject.toml | 41 +++++++++++++++++++++++ charms/garm-operator/requirements.txt | 4 +++ charms/garm-operator/src/.gitkeep | 0 charms/garm-operator/tox.toml | 48 +++++++++++++++++++++++++++ 5 files changed, 128 insertions(+) create mode 100644 charms/garm-operator/charmcraft.yaml create mode 100644 charms/garm-operator/pyproject.toml create mode 100644 charms/garm-operator/requirements.txt create mode 100644 charms/garm-operator/src/.gitkeep create mode 100644 charms/garm-operator/tox.toml diff --git a/charms/garm-operator/charmcraft.yaml b/charms/garm-operator/charmcraft.yaml new file mode 100644 index 00000000..7cbd1385 --- /dev/null +++ b/charms/garm-operator/charmcraft.yaml @@ -0,0 +1,35 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +name: github-runner-garm + +type: charm + +base: ubuntu@24.04 + +platforms: + amd64: + +summary: GARM — GitHub Actions Runner Manager charm. + +description: | + Deploys and manages GARM (GitHub Actions Runner Manager) using the + 12-factor app framework with SQLite storage and an OpenStack runner provider. + +extensions: + - go-framework + +config: + options: + garm-listen-address: + type: string + default: "0.0.0.0" + description: Address GARM API server listens on. + garm-listen-port: + type: int + default: 9997 + description: Port GARM API server listens on. + garm-db-path: + type: string + default: "/srv/garm/data/garm.db" + description: Path to the SQLite database file. diff --git a/charms/garm-operator/pyproject.toml b/charms/garm-operator/pyproject.toml new file mode 100644 index 00000000..660ac999 --- /dev/null +++ b/charms/garm-operator/pyproject.toml @@ -0,0 +1,41 @@ +# Testing tools configuration +[tool.coverage.run] +branch = true + +[tool.coverage.report] +show_missing = true + +[tool.pytest.ini_options] +minversion = "6.0" +log_cli_level = "INFO" + +# Linting tools configuration +[tool.ruff] +line-length = 99 +lint.select = ["E", "W", "F", "C", "N", "D", "I001"] +lint.ignore = [ + "D105", + "D107", + "D203", + "D204", + "D213", + "D215", + "D400", + "D404", + "D406", + "D407", + "D408", + "D409", + "D413", +] +extend-exclude = ["__pycache__", "*.egg_info"] +lint.per-file-ignores = {"tests/*" = ["D100","D101","D102","D103","D104"]} + +[tool.ruff.lint.mccabe] +max-complexity = 10 + +[tool.codespell] +skip = "build,lib,venv,icon.svg,.tox,.git,.mypy_cache,.ruff_cache,.coverage" + +[tool.pyright] +include = ["src/**.py"] diff --git a/charms/garm-operator/requirements.txt b/charms/garm-operator/requirements.txt new file mode 100644 index 00000000..efbc780f --- /dev/null +++ b/charms/garm-operator/requirements.txt @@ -0,0 +1,4 @@ +--only-binary=pluggy +ops==3.7.0 +paas-charm==1.11.2 +tomli-w==1.2.0 diff --git a/charms/garm-operator/src/.gitkeep b/charms/garm-operator/src/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/charms/garm-operator/tox.toml b/charms/garm-operator/tox.toml new file mode 100644 index 00000000..4c32b1f7 --- /dev/null +++ b/charms/garm-operator/tox.toml @@ -0,0 +1,48 @@ +env_list = ["lint", "complexity", "static", "unit", "coverage-report"] + +[env_run_base] +package = "skip" +set_env = { PYTHONPATH = "{tox_root}/src:{tox_root}/tests", PYTHONBREAKPOINT = "pdb.set_trace", PY_COLORS = "1" } +pass_env = ["PYTHONPATH", "CHARM_BUILD_DIR", "MODEL_SETTINGS"] + +[env.lint] +description = "Check code against coding style standards" +deps = ["ruff", "codespell"] +commands = [ + ["codespell", "{tox_root}"], + ["ruff", "check", "src"], + ["ruff", "format", "--check", "--diff", "src"], +] + +[env.complexity] +description = "Check cyclomatic complexity" +deps = ["ruff"] +commands = [["ruff", "check", "--select", "C90", "src"]] + +[env.static] +description = "Run static type checks" +deps = ["pyright", "-r requirements.txt"] +commands = [["pyright"]] + +[env.unit] +description = "Run unit tests" +deps = ["pytest", "coverage[toml]", "-r requirements.txt"] +commands = [ + [ + "coverage", + "run", + "--source=src", + "-m", + "pytest", + "--tb", + "native", + "-v", + "-s", + { replace = "posargs", default = ["tests/unit"], extend = true }, + ], +] + +[env.coverage-report] +description = "Report coverage results" +deps = ["coverage[toml]"] +commands = [["coverage", "report"]] From 503af21cfa7c663f089f516f1c20ce42716f11c0 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Fri, 22 May 2026 12:41:04 +0800 Subject: [PATCH 04/29] fix: correct garm-operator scaffold issues - Add lib/ to PYTHONPATH in tox.toml (needed when charm libraries are vendored) - Add .gitignore copied from planner-operator - Add tests/unit/.gitkeep so the directory is tracked by git Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/.gitignore | 9 +++++++++ charms/garm-operator/tests/unit/.gitkeep | 0 charms/garm-operator/tox.toml | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 charms/garm-operator/.gitignore create mode 100644 charms/garm-operator/tests/unit/.gitkeep diff --git a/charms/garm-operator/.gitignore b/charms/garm-operator/.gitignore new file mode 100644 index 00000000..a26d707f --- /dev/null +++ b/charms/garm-operator/.gitignore @@ -0,0 +1,9 @@ +venv/ +build/ +*.charm +.tox/ +.coverage +__pycache__/ +*.py[cod] +.idea +.vscode/ diff --git a/charms/garm-operator/tests/unit/.gitkeep b/charms/garm-operator/tests/unit/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/charms/garm-operator/tox.toml b/charms/garm-operator/tox.toml index 4c32b1f7..b6c20550 100644 --- a/charms/garm-operator/tox.toml +++ b/charms/garm-operator/tox.toml @@ -2,7 +2,7 @@ env_list = ["lint", "complexity", "static", "unit", "coverage-report"] [env_run_base] package = "skip" -set_env = { PYTHONPATH = "{tox_root}/src:{tox_root}/tests", PYTHONBREAKPOINT = "pdb.set_trace", PY_COLORS = "1" } +set_env = { PYTHONPATH = "{tox_root}/lib:{tox_root}/src:{tox_root}/tests", PYTHONBREAKPOINT = "pdb.set_trace", PY_COLORS = "1" } pass_env = ["PYTHONPATH", "CHARM_BUILD_DIR", "MODEL_SETTINGS"] [env.lint] From b728d12e405e570874d670f479e85fbe5a8bde01 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Fri, 22 May 2026 13:11:25 +0800 Subject: [PATCH 05/29] feat: add GARM TOML rendering function with tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/src/.gitkeep | 0 charms/garm-operator/src/charm.py | 76 +++++++++++++ charms/garm-operator/tests/unit/.gitkeep | 0 charms/garm-operator/tests/unit/test_charm.py | 105 ++++++++++++++++++ charms/garm-operator/tox.toml | 2 +- 5 files changed, 182 insertions(+), 1 deletion(-) delete mode 100644 charms/garm-operator/src/.gitkeep create mode 100644 charms/garm-operator/src/charm.py delete mode 100644 charms/garm-operator/tests/unit/.gitkeep create mode 100644 charms/garm-operator/tests/unit/test_charm.py diff --git a/charms/garm-operator/src/.gitkeep b/charms/garm-operator/src/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/charms/garm-operator/src/charm.py b/charms/garm-operator/src/charm.py new file mode 100644 index 00000000..7e591229 --- /dev/null +++ b/charms/garm-operator/src/charm.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +"""GARM charm entrypoint.""" + +import logging +import typing + +import ops +import tomli_w + +logger = logging.getLogger(__name__) + +GARM_CONFIG_PATH: typing.Final[str] = "/srv/garm/config/config.toml" +GARM_SECRETS_LABEL: typing.Final[str] = "garm-secrets" +CONTAINER_NAME: typing.Final[str] = "app" +PEBBLE_SERVICE_NAME: typing.Final[str] = "app" +GARM_BINARY: typing.Final[str] = "/usr/local/bin/garm" +OPENSTACK_PROVIDER_BINARY: typing.Final[str] = "/usr/local/bin/garm-provider-openstack" + + +def render_garm_toml( + *, + listen_address: str, + listen_port: int, + db_path: str, + jwt_secret: str, +) -> str: + """Render GARM's TOML configuration file content. + + Args: + listen_address: IP address for the GARM API server to bind on. + listen_port: Port for the GARM API server. + db_path: Filesystem path to the SQLite database file. + jwt_secret: Secret string used to sign GARM JWT tokens. + + Returns: + TOML-formatted string ready to be written to disk. + """ + config: dict[str, typing.Any] = { + "database": { + "backend": "sqlite3", + "sqlite3": {"db_file": db_path}, + }, + "apiserver": { + "bind": listen_address, + "port": listen_port, + "use_tls": False, + }, + "jwt_auth": { + "secret": jwt_secret, + "time_to_live": "8760h", + }, + "metrics": { + "disable_auth": True, + "enable": True, + }, + "provider": [ + { + "name": "openstack", + "provider_type": "external", + "description": "OpenStack provider", + "external": { + "config_file": "", + "provider_executable": OPENSTACK_PROVIDER_BINARY, + "environment_variables": [], + }, + } + ], + } + return tomli_w.dumps(config) + + +if __name__ == "__main__": + ops.main(GarmCharm) # type: ignore[name-defined] # GarmCharm defined in Task 4 diff --git a/charms/garm-operator/tests/unit/.gitkeep b/charms/garm-operator/tests/unit/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/charms/garm-operator/tests/unit/test_charm.py b/charms/garm-operator/tests/unit/test_charm.py new file mode 100644 index 00000000..c9579ddf --- /dev/null +++ b/charms/garm-operator/tests/unit/test_charm.py @@ -0,0 +1,105 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Unit tests for GarmCharm.""" + +try: + import tomllib +except ImportError: + import tomli as tomllib # type: ignore[no-redef] + +import pytest + +from charm import render_garm_toml + + +def test_render_garm_toml_database_section(): + """ + arrange: Provide a db path. + act: Render the GARM TOML config. + assert: The [database] section uses sqlite3 with the given path. + """ + result = render_garm_toml( + listen_address="0.0.0.0", + listen_port=9997, + db_path="/srv/garm/data/garm.db", + jwt_secret="abc123", + ) + parsed = tomllib.loads(result) + assert parsed["database"]["backend"] == "sqlite3" + assert parsed["database"]["sqlite3"]["db_file"] == "/srv/garm/data/garm.db" + + +def test_render_garm_toml_apiserver_section(): + """ + arrange: Provide listen address and port. + act: Render the GARM TOML config. + assert: The [apiserver] section reflects the given address and port. + """ + result = render_garm_toml( + listen_address="127.0.0.1", + listen_port=8080, + db_path="/srv/garm/data/garm.db", + jwt_secret="abc123", + ) + parsed = tomllib.loads(result) + assert parsed["apiserver"]["bind"] == "127.0.0.1" + assert parsed["apiserver"]["port"] == 8080 + assert parsed["apiserver"]["use_tls"] is False + + +def test_render_garm_toml_jwt_auth_section(): + """ + arrange: Provide a jwt_secret. + act: Render the GARM TOML config. + assert: The [jwt_auth] section contains the secret. + """ + result = render_garm_toml( + listen_address="0.0.0.0", + listen_port=9997, + db_path="/srv/garm/data/garm.db", + jwt_secret="mysecret", + ) + parsed = tomllib.loads(result) + assert parsed["jwt_auth"]["secret"] == "mysecret" + assert parsed["jwt_auth"]["time_to_live"] == "8760h" + + +def test_render_garm_toml_metrics_section(): + """ + arrange: Any valid config inputs. + act: Render the GARM TOML config. + assert: The [metrics] section disables auth and enables metrics. + """ + result = render_garm_toml( + listen_address="0.0.0.0", + listen_port=9997, + db_path="/srv/garm/data/garm.db", + jwt_secret="abc123", + ) + parsed = tomllib.loads(result) + assert parsed["metrics"]["disable_auth"] is True + assert parsed["metrics"]["enable"] is True + + +def test_render_garm_toml_provider_section(): + """ + arrange: Any valid config inputs. + act: Render the GARM TOML config. + assert: The [[provider]] section has the OpenStack provider binary. + """ + result = render_garm_toml( + listen_address="0.0.0.0", + listen_port=9997, + db_path="/srv/garm/data/garm.db", + jwt_secret="abc123", + ) + parsed = tomllib.loads(result) + assert len(parsed["provider"]) == 1 + provider = parsed["provider"][0] + assert provider["name"] == "openstack" + assert provider["provider_type"] == "external" + assert ( + provider["external"]["provider_executable"] + == "/usr/local/bin/garm-provider-openstack" + ) diff --git a/charms/garm-operator/tox.toml b/charms/garm-operator/tox.toml index b6c20550..8f252767 100644 --- a/charms/garm-operator/tox.toml +++ b/charms/garm-operator/tox.toml @@ -26,7 +26,7 @@ commands = [["pyright"]] [env.unit] description = "Run unit tests" -deps = ["pytest", "coverage[toml]", "-r requirements.txt"] +deps = ["pytest", "coverage[toml]", "tomli", "-r requirements.txt"] commands = [ [ "coverage", From f8424677307582722008fc816a4588bfb3b18f3e Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Fri, 22 May 2026 13:37:03 +0800 Subject: [PATCH 06/29] fix: address code quality issues in TOML rendering - Remove forward reference to undefined GarmCharm (fixes F821) - Add comments for scaffold placeholder values in provider config - Use Python version marker for tomli test dep Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/src/charm.py | 6 +++--- charms/garm-operator/tox.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charms/garm-operator/src/charm.py b/charms/garm-operator/src/charm.py index 7e591229..464a3fdb 100644 --- a/charms/garm-operator/src/charm.py +++ b/charms/garm-operator/src/charm.py @@ -7,7 +7,6 @@ import logging import typing -import ops import tomli_w logger = logging.getLogger(__name__) @@ -62,8 +61,10 @@ def render_garm_toml( "provider_type": "external", "description": "OpenStack provider", "external": { + # Scaffold: OpenStack config path (populated in future story) "config_file": "", "provider_executable": OPENSTACK_PROVIDER_BINARY, + # Scaffold: OpenStack environment variables (populated in future story) "environment_variables": [], }, } @@ -72,5 +73,4 @@ def render_garm_toml( return tomli_w.dumps(config) -if __name__ == "__main__": - ops.main(GarmCharm) # type: ignore[name-defined] # GarmCharm defined in Task 4 +# GarmCharm class and ops.main() entrypoint are added in the next implementation step. diff --git a/charms/garm-operator/tox.toml b/charms/garm-operator/tox.toml index 8f252767..8bd49c05 100644 --- a/charms/garm-operator/tox.toml +++ b/charms/garm-operator/tox.toml @@ -26,7 +26,7 @@ commands = [["pyright"]] [env.unit] description = "Run unit tests" -deps = ["pytest", "coverage[toml]", "tomli", "-r requirements.txt"] +deps = ["pytest", "coverage[toml]", "tomli; python_version < '3.11'", "-r requirements.txt"] commands = [ [ "coverage", From c61ac1b14a5f9bc23fbdd4f3a8ac63b7d7e93d39 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Fri, 22 May 2026 13:52:00 +0800 Subject: [PATCH 07/29] feat: add GarmCharm class with secrets and Pebble integration - _ensure_secrets() creates jwt-secret and db-passphrase on first install - restart() pushes TOML config and overrides Pebble command before replan - _generate_garm_secrets() produces 64-char hex tokens Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../data_platform_libs/v0/data_interfaces.py | 5782 +++++++++++++++++ .../grafana_k8s/v0/grafana_dashboard.py | 2159 ++++++ .../lib/charms/loki_k8s/v1/loki_push_api.py | 2534 ++++++++ .../observability_libs/v0/juju_topology.py | 311 + .../prometheus_k8s/v0/prometheus_scrape.py | 1946 ++++++ .../lib/charms/redis_k8s/v0/redis.py | 153 + .../tempo_coordinator_k8s/v0/tracing.py | 1010 +++ .../lib/charms/traefik_k8s/v2/ingress.py | 949 +++ charms/garm-operator/src/charm.py | 98 +- charms/garm-operator/tests/unit/test_charm.py | 34 + 10 files changed, 14975 insertions(+), 1 deletion(-) create mode 100644 charms/garm-operator/lib/charms/data_platform_libs/v0/data_interfaces.py create mode 100644 charms/garm-operator/lib/charms/grafana_k8s/v0/grafana_dashboard.py create mode 100644 charms/garm-operator/lib/charms/loki_k8s/v1/loki_push_api.py create mode 100644 charms/garm-operator/lib/charms/observability_libs/v0/juju_topology.py create mode 100644 charms/garm-operator/lib/charms/prometheus_k8s/v0/prometheus_scrape.py create mode 100644 charms/garm-operator/lib/charms/redis_k8s/v0/redis.py create mode 100644 charms/garm-operator/lib/charms/tempo_coordinator_k8s/v0/tracing.py create mode 100644 charms/garm-operator/lib/charms/traefik_k8s/v2/ingress.py diff --git a/charms/garm-operator/lib/charms/data_platform_libs/v0/data_interfaces.py b/charms/garm-operator/lib/charms/data_platform_libs/v0/data_interfaces.py new file mode 100644 index 00000000..5be1d931 --- /dev/null +++ b/charms/garm-operator/lib/charms/data_platform_libs/v0/data_interfaces.py @@ -0,0 +1,5782 @@ +# Copyright 2023 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Library to manage the relation for the data-platform products. + +This library contains the Requires and Provides classes for handling the relation +between an application and multiple managed application supported by the data-team: +MySQL, Postgresql, MongoDB, Redis, Kafka, and Karapace. + +### Database (MySQL, Postgresql, MongoDB, and Redis) + +#### Requires Charm +This library is a uniform interface to a selection of common database +metadata, with added custom events that add convenience to database management, +and methods to consume the application related data. + + +Following an example of using the DatabaseCreatedEvent, in the context of the +application charm code: + +```python + +from charms.data_platform_libs.v0.data_interfaces import ( + DatabaseCreatedEvent, + DatabaseRequires, + DatabaseEntityCreatedEvent, +) + +class ApplicationCharm(CharmBase): + # Application charm that connects to database charms. + + def __init__(self, *args): + super().__init__(*args) + + # Charm events defined in the database requires charm library. + self.database = DatabaseRequires(self, relation_name="database", database_name="database") + self.framework.observe(self.database.on.database_created, self._on_database_created) + self.framework.observe(self.database.on.database_entity_created, self._on_database_entity_created) + + def _on_database_created(self, event: DatabaseCreatedEvent) -> None: + # Handle the created database + + # Create configuration file for app + config_file = self._render_app_config_file( + event.username, + event.password, + event.endpoints, + ) + + # Start application with rendered configuration + self._start_application(config_file) + + # Set active status + self.unit.status = ActiveStatus("received database credentials") + + def _on_database_entity_created(self, event: DatabaseEntityCreatedEvent) -> None: + # Handle the created entity + ... +``` + +As shown above, the library provides some custom events to handle specific situations, +which are listed below: + +- database_created: event emitted when the requested database is created. +- database_entity_created: event emitted when the requested entity is created. +- endpoints_changed: event emitted when the read/write endpoints of the database have changed. +- read_only_endpoints_changed: event emitted when the read-only endpoints of the database + have changed. Event is not triggered if read/write endpoints changed too. + +If it is needed to connect multiple database clusters to the same relation endpoint +the application charm can implement the same code as if it would connect to only +one database cluster (like the above code example). + +To differentiate multiple clusters connected to the same relation endpoint +the application charm can use the name of the remote application: + +```python + +def _on_database_created(self, event: DatabaseCreatedEvent) -> None: + # Get the remote app name of the cluster that triggered this event + cluster = event.relation.app.name +``` + +It is also possible to provide an alias for each different database cluster/relation. + +So, it is possible to differentiate the clusters in two ways. +The first is to use the remote application name, i.e., `event.relation.app.name`, as above. + +The second way is to use different event handlers to handle each cluster events. +The implementation would be something like the following code: + +```python + +from charms.data_platform_libs.v0.data_interfaces import ( + DatabaseCreatedEvent, + DatabaseRequires, +) + +class ApplicationCharm(CharmBase): + # Application charm that connects to database charms. + + def __init__(self, *args): + super().__init__(*args) + + # Define the cluster aliases and one handler for each cluster database created event. + self.database = DatabaseRequires( + self, + relation_name="database", + database_name="database", + relations_aliases = ["cluster1", "cluster2"], + ) + self.framework.observe( + self.database.on.cluster1_database_created, self._on_cluster1_database_created + ) + self.framework.observe( + self.database.on.cluster2_database_created, self._on_cluster2_database_created + ) + + def _on_cluster1_database_created(self, event: DatabaseCreatedEvent) -> None: + # Handle the created database on the cluster named cluster1 + + # Create configuration file for app + config_file = self._render_app_config_file( + event.username, + event.password, + event.endpoints, + ) + ... + + def _on_cluster2_database_created(self, event: DatabaseCreatedEvent) -> None: + # Handle the created database on the cluster named cluster2 + + # Create configuration file for app + config_file = self._render_app_config_file( + event.username, + event.password, + event.endpoints, + ) + ... +``` + +When it's needed to check whether a plugin (extension) is enabled on the PostgreSQL +charm, you can use the is_postgresql_plugin_enabled method. To use that, you need to +add the following dependency to your charmcraft.yaml file: + +```yaml + +parts: + charm: + charm-binary-python-packages: + - psycopg[binary] +``` + +### Provider Charm + +Following an example of using the DatabaseRequestedEvent, in the context of the +database charm code: + +```python +from charms.data_platform_libs.v0.data_interfaces import DatabaseProvides + +class SampleCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + # Charm events defined in the database provides charm library. + self.provided_database = DatabaseProvides(self, relation_name="database") + self.framework.observe(self.provided_database.on.database_requested, + self._on_database_requested) + # Database generic helper + self.database = DatabaseHelper() + + def _on_database_requested(self, event: DatabaseRequestedEvent) -> None: + # Handle the event triggered by a new database requested in the relation + # Retrieve the database name using the charm library. + db_name = event.database + # generate a new user credential + username = self.database.generate_user() + password = self.database.generate_password() + # set the credentials for the relation + self.provided_database.set_credentials(event.relation.id, username, password) + # set other variables for the relation event.set_tls("False") +``` + +As shown above, the library provides a custom event (database_requested) to handle +the situation when an application charm requests a new database to be created. +It's preferred to subscribe to this event instead of relation changed event to avoid +creating a new database when other information other than a database name is +exchanged in the relation databag. + +### Kafka + +This library is the interface to use and interact with the Kafka charm. This library contains +custom events that add convenience to manage Kafka, and provides methods to consume the +application related data. + +#### Requirer Charm + +```python + +from charms.data_platform_libs.v0.data_interfaces import ( + BootstrapServerChangedEvent, + KafkaRequires, + TopicCreatedEvent, + TopicEntityCreatedEvent, +) + +class ApplicationCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + self.kafka = KafkaRequires(self, "kafka_client", "test-topic") + self.framework.observe( + self.kafka.on.bootstrap_server_changed, self._on_kafka_bootstrap_server_changed + ) + self.framework.observe( + self.kafka.on.topic_created, self._on_kafka_topic_created + ) + self.framework.observe( + self.kafka.on.topic_entity_created, self._on_kafka_topic_entity_created + ) + + def _on_kafka_bootstrap_server_changed(self, event: BootstrapServerChangedEvent): + # Event triggered when a bootstrap server was changed for this application + + new_bootstrap_server = event.bootstrap_server + ... + + def _on_kafka_topic_created(self, event: TopicCreatedEvent): + # Event triggered when a topic was created for this application + username = event.username + password = event.password + tls = event.tls + tls_ca= event.tls_ca + bootstrap_server event.bootstrap_server + consumer_group_prefic = event.consumer_group_prefix + zookeeper_uris = event.zookeeper_uris + ... + + def _on_kafka_topic_entity_created(self, event: TopicEntityCreatedEvent): + # Event triggered when an entity was created for this application + ... +``` + +As shown above, the library provides some custom events to handle specific situations, +which are listed below: + +- topic_created: event emitted when the requested topic is created. +- bootstrap_server_changed: event emitted when the bootstrap server have changed. +- credential_changed: event emitted when the credentials of Kafka changed. + +### Provider Charm + +Following the previous example, this is an example of the provider charm. + +```python +class SampleCharm(CharmBase): + +from charms.data_platform_libs.v0.data_interfaces import ( + KafkaProvides, + TopicRequestedEvent, +) + + def __init__(self, *args): + super().__init__(*args) + + # Default charm events. + self.framework.observe(self.on.start, self._on_start) + + # Charm events defined in the Kafka Provides charm library. + self.kafka_provider = KafkaProvides(self, relation_name="kafka_client") + self.framework.observe(self.kafka_provider.on.topic_requested, self._on_topic_requested) + self.framework.observe(self.kafka_provider.on.topic_entity_requested, self._on_entity_requested) + # Kafka generic helper + self.kafka = KafkaHelper() + + def _on_topic_requested(self, event: TopicRequestedEvent): + # Handle the on_topic_requested event. + + topic = event.topic + relation_id = event.relation.id + # set connection info in the databag relation + self.kafka_provider.set_bootstrap_server(relation_id, self.kafka.get_bootstrap_server()) + self.kafka_provider.set_credentials(relation_id, username=username, password=password) + self.kafka_provider.set_consumer_group_prefix(relation_id, ...) + self.kafka_provider.set_tls(relation_id, "False") + self.kafka_provider.set_zookeeper_uris(relation_id, ...) + + def _on_entity_requested(self, event: EntityRequestedEvent): + # Handle the on_topic_entity_requested event. + ... +``` +As shown above, the library provides a custom event (topic_requested) to handle +the situation when an application charm requests a new topic to be created. +It is preferred to subscribe to this event instead of relation changed event to avoid +creating a new topic when other information other than a topic name is +exchanged in the relation databag. + +### Karapace + +This library is the interface to use and interact with the Karapace charm. This library contains +custom events that add convenience to manage Karapace, and provides methods to consume the +application related data. + +#### Requirer Charm + +```python + +from charms.data_platform_libs.v0.data_interfaces import ( + EndpointsChangedEvent, + KarapaceRequires, + SubjectAllowedEvent, +) + +class ApplicationCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + self.karapace = KarapaceRequires(self, relation_name="karapace_client", subject="test-subject") + self.framework.observe( + self.karapace.on.server_changed, self._on_karapace_server_changed + ) + self.framework.observe( + self.karapace.on.subject_allowed, self._on_karapace_subject_allowed + ) + self.framework.observe( + self.karapace.on.subject_entity_created, self._on_subject_entity_created + ) + + + def _on_karapace_server_changed(self, event: EndpointsChangedEvent): + # Event triggered when a server endpoint was changed for this application + new_server = event.endpoints + ... + + def _on_karapace_subject_allowed(self, event: SubjectAllowedEvent): + # Event triggered when a subject was allowed for this application + username = event.username + password = event.password + tls = event.tls + endpoints = event.endpoints + ... + + def _on_subject_entity_created(self, event: SubjectEntityCreatedEvent): + # Event triggered when a subject entity was created this application + entity_name = event.entity_name + entity_password = event.entity_password + ... +``` + +As shown above, the library provides some custom events to handle specific situations, +which are listed below: + +- subject_allowed: event emitted when the requested subject is allowed. +- server_changed: event emitted when the server endpoints have changed. + +#### Provider Charm + +Following the previous example, this is an example of the provider charm. + +```python +class SampleCharm(CharmBase): + +from charms.data_platform_libs.v0.data_interfaces import ( + KarapaceProvides, + SubjectRequestedEvent, +) + + def __init__(self, *args): + super().__init__(*args) + + # Default charm events. + self.framework.observe(self.on.start, self._on_start) + + # Charm events defined in the Karapace Provides charm library. + self.karapace_provider = KarapaceProvides(self, relation_name="karapace_client") + self.framework.observe(self.karapace_provider.on.subject_requested, self._on_subject_requested) + # Karapace generic helper + self.karapace = KarapaceHelper() + + def _on_subject_requested(self, event: SubjectRequestedEvent): + # Handle the on_subject_requested event. + + subject = event.subject + relation_id = event.relation.id + # set connection info in the databag relation + self.karapace_provider.set_endpoint(relation_id, self.karapace.get_endpoint()) + self.karapace_provider.set_credentials(relation_id, username=username, password=password) + self.karapace_provider.set_tls(relation_id, "False") +``` + +As shown above, the library provides a custom event (subject_requested) to handle +the situation when an application charm requests a new subject to be created. +It is preferred to subscribe to this event instead of relation changed event to avoid +creating a new subject when other information other than a subject name is +exchanged in the relation databag. +""" + +import copy +import json +import logging +from abc import ABC, abstractmethod +from collections import UserDict, namedtuple +from dataclasses import asdict, dataclass +from datetime import datetime +from enum import Enum +from os import PathLike +from pathlib import Path +from typing import ( + Callable, + Dict, + Final, + ItemsView, + KeysView, + List, + Optional, + Set, + Tuple, + TypedDict, + Union, + ValuesView, + overload, +) + +from ops import JujuVersion, Model, Secret, SecretInfo, SecretNotFoundError +from ops.charm import ( + CharmBase, + CharmEvents, + RelationChangedEvent, + RelationCreatedEvent, + RelationEvent, + SecretChangedEvent, +) +from ops.framework import EventSource, Handle, Object +from ops.model import Application, ModelError, Relation, Unit + +# The unique Charmhub library identifier, never change it +LIBID = "6c3e6b6680d64e9c89e611d1a15f65be" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 58 + +PYDEPS = ["ops>=2.0.0"] + +# Starting from what LIBPATCH number to apply legacy solutions +# v0.17 was the last version without secrets +LEGACY_SUPPORT_FROM = 17 + +logger = logging.getLogger(__name__) + +Diff = namedtuple("Diff", "added changed deleted") +Diff.__doc__ = """ +A tuple for storing the diff between two data mappings. + +added - keys that were added +changed - keys that still exist but have new values +deleted - key that were deleted""" + +OptionalPathLike = Optional[Union[PathLike, str]] + +ENTITY_USER = "USER" +ENTITY_GROUP = "GROUP" + +PROV_SECRET_PREFIX = "secret-" +PROV_SECRET_FIELDS = "provided-secrets" +REQ_SECRET_FIELDS = "requested-secrets" +STATUS_FIELD = "status" +GROUP_MAPPING_FIELD = "secret_group_mapping" +GROUP_SEPARATOR = "@" + +MODEL_ERRORS = { + "not_leader": "this unit is not the leader", + "no_label_and_uri": "ERROR either URI or label should be used for getting an owned secret but not both", + "owner_no_refresh": "ERROR secret owner cannot use --refresh", +} + + +############################################################################## +# Exceptions +############################################################################## + + +class DataInterfacesError(Exception): + """Common ancestor for DataInterfaces related exceptions.""" + + +class SecretError(DataInterfacesError): + """Common ancestor for Secrets related exceptions.""" + + +class SecretAlreadyExistsError(SecretError): + """A secret that was to be added already exists.""" + + +class SecretsUnavailableError(SecretError): + """Secrets aren't yet available for Juju version used.""" + + +class SecretsIllegalUpdateError(SecretError): + """Secrets aren't yet available for Juju version used.""" + + +class IllegalOperationError(DataInterfacesError): + """To be used when an operation is not allowed to be performed.""" + + +class PrematureDataAccessError(DataInterfacesError): + """To be raised when the Relation Data may be accessed (written) before protocol init complete.""" + + +############################################################################## +# Global helpers / utilities +############################################################################## + +############################################################################## +# Databag handling and comparison methods +############################################################################## + + +def get_encoded_dict( + relation: Relation, member: Union[Unit, Application], field: str +) -> Optional[Dict[str, str]]: + """Retrieve and decode an encoded field from relation data.""" + data = json.loads(relation.data[member].get(field, "{}")) + if isinstance(data, dict): + return data + logger.error("Unexpected datatype for %s instead of dict.", str(data)) + + +def get_encoded_list( + relation: Relation, member: Union[Unit, Application], field: str +) -> Optional[List[str]]: + """Retrieve and decode an encoded field from relation data.""" + data = json.loads(relation.data[member].get(field, "[]")) + if isinstance(data, list): + return data + logger.error("Unexpected datatype for %s instead of list.", str(data)) + + +def set_encoded_field( + relation: Relation, + member: Union[Unit, Application], + field: str, + value: Union[str, list, Dict[str, str]], +) -> None: + """Set an encoded field from relation data.""" + relation.data[member].update({field: json.dumps(value)}) + + +def diff(event: RelationChangedEvent, bucket: Optional[Union[Unit, Application]]) -> Diff: + """Retrieves the diff of the data in the relation changed databag. + + Args: + event: relation changed event. + bucket: bucket of the databag (app or unit) + + Returns: + a Diff instance containing the added, deleted and changed + keys from the event relation databag. + """ + # Retrieve the old data from the data key in the application relation databag. + if not bucket: + return Diff([], [], []) + + old_data = get_encoded_dict(event.relation, bucket, "data") + + if not old_data: + old_data = {} + + # Retrieve the new data from the event relation databag. + new_data = ( + {key: value for key, value in event.relation.data[event.app].items() if key != "data"} + if event.app + else {} + ) + + # These are the keys that were added to the databag and triggered this event. + added = new_data.keys() - old_data.keys() # pyright: ignore [reportAssignmentType] + # These are the keys that were removed from the databag and triggered this event. + deleted = old_data.keys() - new_data.keys() # pyright: ignore [reportAssignmentType] + # These are the keys that already existed in the databag, + # but had their values changed. + changed = { + key + for key in old_data.keys() & new_data.keys() # pyright: ignore [reportAssignmentType] + if old_data[key] != new_data[key] # pyright: ignore [reportAssignmentType] + } + # Convert the new_data to a serializable format and save it for a next diff check. + set_encoded_field(event.relation, bucket, "data", new_data) + + # Return the diff with all possible changes. + return Diff(added, changed, deleted) + + +############################################################################## +# Module decorators +############################################################################## + + +def leader_only(f): + """Decorator to ensure that only leader can perform given operation.""" + + def wrapper(self, *args, **kwargs): + if self.component == self.local_app and not self.local_unit.is_leader(): + logger.error( + "This operation (%s()) can only be performed by the leader unit", f.__name__ + ) + return + return f(self, *args, **kwargs) + + wrapper.leader_only = True + return wrapper + + +def juju_secrets_only(f): + """Decorator to ensure that certain operations would be only executed on Juju3.""" + + def wrapper(self, *args, **kwargs): + if not self.secrets_enabled: + raise SecretsUnavailableError("Secrets unavailable on current Juju version") + return f(self, *args, **kwargs) + + return wrapper + + +def dynamic_secrets_only(f): + """Decorator to ensure that certain operations would be only executed when NO static secrets are defined.""" + + def wrapper(self, *args, **kwargs): + if self.static_secret_fields: + raise IllegalOperationError( + "Unsafe usage of statically and dynamically defined secrets, aborting." + ) + return f(self, *args, **kwargs) + + return wrapper + + +def either_static_or_dynamic_secrets(f): + """Decorator to ensure that static and dynamic secrets won't be used in parallel.""" + + def wrapper(self, *args, **kwargs): + if self.static_secret_fields and set(self.current_secret_fields) - set( + self.static_secret_fields + ): + raise IllegalOperationError( + "Unsafe usage of statically and dynamically defined secrets, aborting." + ) + return f(self, *args, **kwargs) + + return wrapper + + +def legacy_apply_from_version(version: int) -> Callable: + """Decorator to decide whether to apply a legacy function or not. + + Based on LEGACY_SUPPORT_FROM module variable value, the importer charm may only want + to apply legacy solutions starting from a specific LIBPATCH. + + NOTE: All 'legacy' functions have to be defined and called in a way that they return `None`. + This results in cleaner and more secure execution flows in case the function may be disabled. + This requirement implicitly means that legacy functions change the internal state strictly, + don't return information. + """ + + def decorator(f: Callable[..., None]): + """Signature is ensuring None return value.""" + f.legacy_version = version + + def wrapper(self, *args, **kwargs) -> None: + if version >= LEGACY_SUPPORT_FROM: + return f(self, *args, **kwargs) + + return wrapper + + return decorator + + +############################################################################## +# Helper classes +############################################################################## + + +class Scope(Enum): + """Peer relations scope.""" + + APP = "app" + UNIT = "unit" + + +class SecretGroup(str): + """Secret groups specific type.""" + + +@dataclass +class RelationStatus: + """Base data class for status propagation on charm relations.""" + + code: int + message: str + resolution: str + + @property + def is_informational(self) -> bool: + """Is this an informational status?""" + return self.code // 1000 == 1 + + @property + def is_transitory(self) -> bool: + """Is this a transitory status?""" + return self.code // 1000 == 4 + + @property + def is_fatal(self) -> bool: + """Is this a fatal status, requiring removing the relation?""" + return self.code // 1000 == 5 + + +class RelationStatusDict(TypedDict): + """Base type for dict representation of `RelationStatus` dataclass.""" + + code: int + message: str + resolution: str + + +class SecretGroupsAggregate(str): + """Secret groups with option to extend with additional constants.""" + + def __init__(self): + self.USER = SecretGroup("user") + self.TLS = SecretGroup("tls") + self.MTLS = SecretGroup("mtls") + self.ENTITY = SecretGroup("entity") + self.EXTRA = SecretGroup("extra") + + def __setattr__(self, name, value): + """Setting internal constants.""" + if name in self.__dict__: + raise RuntimeError("Can't set constant!") + else: + super().__setattr__(name, SecretGroup(value)) + + def groups(self) -> list: + """Return the list of stored SecretGroups.""" + return list(self.__dict__.values()) + + def get_group(self, group: str) -> Optional[SecretGroup]: + """If the input str translates to a group name, return that.""" + return SecretGroup(group) if group in self.groups() else None + + +SECRET_GROUPS = SecretGroupsAggregate() + + +class CachedSecret: + """Locally cache a secret. + + The data structure is precisely reusing/simulating as in the actual Secret Storage + """ + + KNOWN_MODEL_ERRORS = [MODEL_ERRORS["no_label_and_uri"], MODEL_ERRORS["owner_no_refresh"]] + + def __init__( + self, + model: Model, + component: Union[Application, Unit], + label: str, + secret_uri: Optional[str] = None, + legacy_labels: List[str] = [], + ): + self._secret_meta = None + self._secret_content = {} + self._secret_uri = secret_uri + self.label = label + self._model = model + self.component = component + self.legacy_labels = legacy_labels + self.current_label = None + + @property + def meta(self) -> Optional[Secret]: + """Getting cached secret meta-information.""" + if not self._secret_meta: + if not (self._secret_uri or self.label): + return + + try: + self._secret_meta = self._model.get_secret(label=self.label) + except SecretNotFoundError: + # Falling back to seeking for potential legacy labels + self._legacy_compat_find_secret_by_old_label() + + # If still not found, to be checked by URI, to be labelled with the proposed label + if not self._secret_meta and self._secret_uri: + self._secret_meta = self._model.get_secret(id=self._secret_uri, label=self.label) + return self._secret_meta + + ########################################################################## + # Backwards compatibility / Upgrades + ########################################################################## + # These functions are used to keep backwards compatibility on rolling upgrades + # Policy: + # All data is kept intact until the first write operation. (This allows a minimal + # grace period during which rollbacks are fully safe. For more info see the spec.) + # All data involves: + # - databag contents + # - secrets content + # - secret labels (!!!) + # Legacy functions must return None, and leave an equally consistent state whether + # they are executed or skipped (as a high enough versioned execution environment may + # not require so) + + # Compatibility + + @legacy_apply_from_version(34) + def _legacy_compat_find_secret_by_old_label(self) -> None: + """Compatibility function, allowing to find a secret by a legacy label. + + This functionality is typically needed when secret labels changed over an upgrade. + Until the first write operation, we need to maintain data as it was, including keeping + the old secret label. In order to keep track of the old label currently used to access + the secret, and additional 'current_label' field is being defined. + """ + for label in self.legacy_labels: + try: + self._secret_meta = self._model.get_secret(label=label) + except SecretNotFoundError: + pass + except ModelError as e: + # Permission denied can be raised if the secret exists but is not yet granted to us. + if "permission denied" in str(e): + return + raise + else: + if label != self.label: + self.current_label = label + return + + # Migrations + + @legacy_apply_from_version(34) + def _legacy_migration_to_new_label_if_needed(self) -> None: + """Helper function to re-create the secret with a different label. + + Juju does not provide a way to change secret labels. + Thus whenever moving from secrets version that involves secret label changes, + we "re-create" the existing secret, and attach the new label to the new + secret, to be used from then on. + + Note: we replace the old secret with a new one "in place", as we can't + easily switch the containing SecretCache structure to point to a new secret. + Instead we are changing the 'self' (CachedSecret) object to point to the + new instance. + """ + if not self.current_label or not (self.meta and self._secret_meta): + return + + # Create a new secret with the new label + content = self._secret_meta.get_content() + self._secret_uri = None + + # It will be nice to have the possibility to check if we are the owners of the secret... + try: + self._secret_meta = self.add_secret(content, label=self.label) + except ModelError as err: + if MODEL_ERRORS["not_leader"] not in str(err): + raise + if "permission denied" not in str(err): + raise + self.current_label = None + + ########################################################################## + # Public functions + ########################################################################## + + def add_secret( + self, + content: Dict[str, str], + relation: Optional[Relation] = None, + label: Optional[str] = None, + ) -> Secret: + """Create a new secret.""" + if self._secret_uri: + raise SecretAlreadyExistsError( + "Secret is already defined with uri %s", self._secret_uri + ) + + label = self.label if not label else label + + secret = self.component.add_secret(content, label=label) + if relation and relation.app != self._model.app: + # If it's not a peer relation, grant is to be applied + secret.grant(relation) + self._secret_uri = secret.id + self._secret_meta = secret + return self._secret_meta + + def get_content(self) -> Dict[str, str]: + """Getting cached secret content.""" + if not self._secret_content: + if self.meta: + try: + self._secret_content = self.meta.get_content(refresh=True) + except (ValueError, ModelError) as err: + # https://bugs.launchpad.net/juju/+bug/2042596 + # Only triggered when 'refresh' is set + if isinstance(err, ModelError) and not any( + msg in str(err) for msg in self.KNOWN_MODEL_ERRORS + ): + raise + # Due to: ValueError: Secret owner cannot use refresh=True + self._secret_content = self.meta.get_content() + return self._secret_content + + def set_content(self, content: Dict[str, str]) -> None: + """Setting cached secret content.""" + if not self.meta: + return + + # DPE-4182: do not create new revision if the content stay the same + if content == self.get_content(): + return + + if content: + self._legacy_migration_to_new_label_if_needed() + self.meta.set_content(content) + self._secret_content = content + else: + self.meta.remove_all_revisions() + + def get_info(self) -> Optional[SecretInfo]: + """Wrapper function to apply the corresponding call on the Secret object within CachedSecret if any.""" + if self.meta: + return self.meta.get_info() + + def remove(self) -> None: + """Remove secret.""" + if not self.meta: + raise SecretsUnavailableError("Non-existent secret was attempted to be removed.") + try: + self.meta.remove_all_revisions() + except SecretNotFoundError: + pass + self._secret_content = {} + self._secret_meta = None + self._secret_uri = None + + +class SecretCache: + """A data structure storing CachedSecret objects.""" + + def __init__(self, model: Model, component: Union[Application, Unit]): + self._model = model + self.component = component + self._secrets: Dict[str, CachedSecret] = {} + + def get( + self, label: str, uri: Optional[str] = None, legacy_labels: List[str] = [] + ) -> Optional[CachedSecret]: + """Getting a secret from Juju Secret store or cache.""" + if not self._secrets.get(label): + secret = CachedSecret( + self._model, self.component, label, uri, legacy_labels=legacy_labels + ) + if secret.meta: + self._secrets[label] = secret + return self._secrets.get(label) + + def add(self, label: str, content: Dict[str, str], relation: Relation) -> CachedSecret: + """Adding a secret to Juju Secret.""" + if self._secrets.get(label): + raise SecretAlreadyExistsError(f"Secret {label} already exists") + + secret = CachedSecret(self._model, self.component, label) + secret.add_secret(content, relation) + self._secrets[label] = secret + return self._secrets[label] + + def remove(self, label: str) -> None: + """Remove a secret from the cache.""" + if secret := self.get(label): + try: + secret.remove() + self._secrets.pop(label) + except (SecretsUnavailableError, KeyError): + pass + else: + return + logging.debug("Non-existing Juju Secret was attempted to be removed %s", label) + + +################################################################################ +# Relation Data base/abstract ancestors (i.e. parent classes) +################################################################################ + + +# Base Data + + +class DataDict(UserDict): + """Python Standard Library 'dict' - like representation of Relation Data.""" + + def __init__(self, relation_data: "Data", relation_id: int): + self.relation_data = relation_data + self.relation_id = relation_id + + @property + def data(self) -> Dict[str, str]: + """Return the full content of the Abstract Relation Data dictionary.""" + result = self.relation_data.fetch_my_relation_data([self.relation_id]) + try: + result_remote = self.relation_data.fetch_relation_data([self.relation_id]) + except NotImplementedError: + result_remote = {self.relation_id: {}} + if result: + result_remote[self.relation_id].update(result[self.relation_id]) + return result_remote.get(self.relation_id, {}) + + def __setitem__(self, key: str, item: str) -> None: + """Set an item of the Abstract Relation Data dictionary.""" + self.relation_data.update_relation_data(self.relation_id, {key: item}) + + def __getitem__(self, key: str) -> str: + """Get an item of the Abstract Relation Data dictionary.""" + result = None + + # Avoiding "leader_only" error when cross-charm non-leader unit, not to report useless error + if ( + not hasattr(self.relation_data.fetch_my_relation_field, "leader_only") + or self.relation_data.component != self.relation_data.local_app + or self.relation_data.local_unit.is_leader() + ): + result = self.relation_data.fetch_my_relation_field(self.relation_id, key) + + if not result: + try: + result = self.relation_data.fetch_relation_field(self.relation_id, key) + except NotImplementedError: + pass + + if not result: + raise KeyError + return result + + def __eq__(self, d: dict) -> bool: + """Equality.""" + return self.data == d + + def __repr__(self) -> str: + """String representation Abstract Relation Data dictionary.""" + return repr(self.data) + + def __len__(self) -> int: + """Length of the Abstract Relation Data dictionary.""" + return len(self.data) + + def __delitem__(self, key: str) -> None: + """Delete an item of the Abstract Relation Data dictionary.""" + self.relation_data.delete_relation_data(self.relation_id, [key]) + + def has_key(self, key: str) -> bool: + """Does the key exist in the Abstract Relation Data dictionary?""" + return key in self.data + + def update(self, items: Dict[str, str]): + """Update the Abstract Relation Data dictionary.""" + self.relation_data.update_relation_data(self.relation_id, items) + + def keys(self) -> KeysView[str]: + """Keys of the Abstract Relation Data dictionary.""" + return self.data.keys() + + def values(self) -> ValuesView[str]: + """Values of the Abstract Relation Data dictionary.""" + return self.data.values() + + def items(self) -> ItemsView[str, str]: + """Items of the Abstract Relation Data dictionary.""" + return self.data.items() + + def pop(self, item: str) -> str: + """Pop an item of the Abstract Relation Data dictionary.""" + result = self.relation_data.fetch_my_relation_field(self.relation_id, item) + if not result: + raise KeyError(f"Item {item} doesn't exist.") + self.relation_data.delete_relation_data(self.relation_id, [item]) + return result + + def __contains__(self, item: str) -> bool: + """Does the Abstract Relation Data dictionary contain item?""" + return item in self.data.values() + + def __iter__(self): + """Iterate through the Abstract Relation Data dictionary.""" + return iter(self.data) + + def get(self, key: str, default: Optional[str] = None) -> Optional[str]: + """Safely get an item of the Abstract Relation Data dictionary.""" + try: + if result := self[key]: + return result + except KeyError: + return default + + +class Data(ABC): + """Base relation data manipulation (abstract) class.""" + + SCOPE = Scope.APP + + # Local map to associate mappings with secrets potentially as a group + SECRET_LABEL_MAP = { + "username": SECRET_GROUPS.USER, + "password": SECRET_GROUPS.USER, + "uris": SECRET_GROUPS.USER, + "read-only-uris": SECRET_GROUPS.USER, + "tls": SECRET_GROUPS.TLS, + "tls-ca": SECRET_GROUPS.TLS, + "mtls-cert": SECRET_GROUPS.MTLS, + "entity-name": SECRET_GROUPS.ENTITY, + "entity-password": SECRET_GROUPS.ENTITY, + } + + SECRET_FIELDS = [] + + def __init__( + self, + model: Model, + relation_name: str, + ) -> None: + self._model = model + self.local_app = self._model.app + self.local_unit = self._model.unit + self.relation_name = relation_name + self._jujuversion = None + self.component = self.local_app if self.SCOPE == Scope.APP else self.local_unit + self.secrets = SecretCache(self._model, self.component) + self.data_component = None + self._local_secret_fields = [] + self._remote_secret_fields = list(self.SECRET_FIELDS) + + @property + def relations(self) -> List[Relation]: + """The list of Relation instances associated with this relation_name.""" + return self._model.relations[self.relation_name] + + @property + def secrets_enabled(self): + """Is this Juju version allowing for Secrets usage?""" + if not self._jujuversion: + self._jujuversion = JujuVersion.from_environ() + return self._jujuversion.has_secrets + + @property + def secret_label_map(self): + """Exposing secret-label map via a property -- could be overridden in descendants!""" + return self.SECRET_LABEL_MAP + + @property + def local_secret_fields(self) -> Optional[List[str]]: + """Local access to secrets field, in case they are being used.""" + if self.secrets_enabled: + return self._local_secret_fields + + @property + def remote_secret_fields(self) -> Optional[List[str]]: + """Local access to secrets field, in case they are being used.""" + if self.secrets_enabled: + return self._remote_secret_fields + + @property + def my_secret_groups(self) -> Optional[List[SecretGroup]]: + """Local access to secrets field, in case they are being used.""" + if self.secrets_enabled: + return [ + self.SECRET_LABEL_MAP[field] + for field in self._local_secret_fields + if field in self.SECRET_LABEL_MAP + ] + + # Mandatory overrides for internal/helper methods + + @juju_secrets_only + def _get_relation_secret( + self, relation_id: int, group_mapping: SecretGroup, relation_name: Optional[str] = None + ) -> Optional[CachedSecret]: + """Retrieve a Juju Secret that's been stored in the relation databag.""" + if not relation_name: + relation_name = self.relation_name + + label = self._generate_secret_label(relation_name, relation_id, group_mapping) + if secret := self.secrets.get(label): + return secret + + relation = self._model.get_relation(relation_name, relation_id) + if not relation: + return + + if secret_uri := self.get_secret_uri(relation, group_mapping): + return self.secrets.get(label, secret_uri) + + # Mandatory overrides for requirer and peer, implemented for Provider + # Requirer uses local component and switched keys + # _local_secret_fields -> PROV_SECRET_FIELDS + # _remote_secret_fields -> REQ_SECRET_FIELDS + # provider uses remote component and + # _local_secret_fields -> REQ_SECRET_FIELDS + # _remote_secret_fields -> PROV_SECRET_FIELDS + @abstractmethod + def _load_secrets_from_databag(self, relation: Relation) -> None: + """Load secrets from the databag.""" + raise NotImplementedError + + def _fetch_specific_relation_data( + self, relation: Relation, fields: Optional[List[str]] + ) -> Dict[str, str]: + """Fetch data available (directily or indirectly -- i.e. secrets) from the relation (remote app data).""" + if not relation.app: + return {} + self._load_secrets_from_databag(relation) + return self._fetch_relation_data_with_secrets( + relation.app, self.remote_secret_fields, relation, fields + ) + + def _fetch_my_specific_relation_data( + self, relation: Relation, fields: Optional[List[str]] + ) -> dict: + """Fetch our own relation data.""" + # load secrets + self._load_secrets_from_databag(relation) + return self._fetch_relation_data_with_secrets( + self.local_app, + self.local_secret_fields, + relation, + fields, + ) + + def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: + """Set values for fields not caring whether it's a secret or not.""" + self._load_secrets_from_databag(relation) + + _, normal_fields = self._process_secret_fields( + relation, + self.local_secret_fields, + list(data), + self._add_or_update_relation_secrets, + data=data, + ) + + normal_content = {k: v for k, v in data.items() if k in normal_fields} + self._update_relation_data_without_secrets(self.local_app, relation, normal_content) + + def _add_or_update_relation_secrets( + self, + relation: Relation, + group: SecretGroup, + secret_fields: Set[str], + data: Dict[str, str], + uri_to_databag=True, + ) -> bool: + """Update contents for Secret group. If the Secret doesn't exist, create it.""" + if self._get_relation_secret(relation.id, group): + return self._update_relation_secret(relation, group, secret_fields, data) + + return self._add_relation_secret(relation, group, secret_fields, data, uri_to_databag) + + @juju_secrets_only + def _add_relation_secret( + self, + relation: Relation, + group_mapping: SecretGroup, + secret_fields: Set[str], + data: Dict[str, str], + uri_to_databag=True, + ) -> bool: + """Add a new Juju Secret that will be registered in the relation databag.""" + if uri_to_databag and self.get_secret_uri(relation, group_mapping): + logging.error("Secret for relation %s already exists, not adding again", relation.id) + return False + + content = self._content_for_secret_group(data, secret_fields, group_mapping) + + label = self._generate_secret_label(self.relation_name, relation.id, group_mapping) + secret = self.secrets.add(label, content, relation) + + if uri_to_databag: + # According to lint we may not have a Secret ID + if not secret.meta or not secret.meta.id: + logging.error("Secret is missing Secret ID") + raise SecretError("Secret added but is missing Secret ID") + + self.set_secret_uri(relation, group_mapping, secret.meta.id) + + # Return the content that was added + return True + + @juju_secrets_only + def _update_relation_secret( + self, + relation: Relation, + group_mapping: SecretGroup, + secret_fields: Set[str], + data: Dict[str, str], + ) -> bool: + """Update the contents of an existing Juju Secret, referred in the relation databag.""" + secret = self._get_relation_secret(relation.id, group_mapping) + + if not secret: + logging.error("Can't update secret for relation %s", relation.id) + return False + + content = self._content_for_secret_group(data, secret_fields, group_mapping) + + old_content = secret.get_content() + full_content = copy.deepcopy(old_content) + full_content.update(content) + secret.set_content(full_content) + + # Return True on success + return True + + @juju_secrets_only + def _delete_relation_secret( + self, relation: Relation, group: SecretGroup, secret_fields: List[str], fields: List[str] + ) -> bool: + """Update the contents of an existing Juju Secret, referred in the relation databag.""" + secret = self._get_relation_secret(relation.id, group) + + if not secret: + logging.error("Can't delete secret for relation %s", str(relation.id)) + return False + + old_content = secret.get_content() + new_content = copy.deepcopy(old_content) + for field in fields: + try: + new_content.pop(field) + except KeyError: + logging.debug( + "Non-existing secret was attempted to be removed %s, %s", + str(relation.id), + str(field), + ) + return False + + # Remove secret from the relation if it's fully gone + if not new_content: + field = self._generate_secret_field_name(group) + try: + relation.data[self.component].pop(field) + except KeyError: + pass + label = self._generate_secret_label(self.relation_name, relation.id, group) + self.secrets.remove(label) + else: + secret.set_content(new_content) + + # Return the content that was removed + return True + + def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: + """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" + if relation.app: + self._load_secrets_from_databag(relation) + + _, normal_fields = self._process_secret_fields( + relation, self.local_secret_fields, fields, self._delete_relation_secret, fields=fields + ) + self._delete_relation_data_without_secrets(self.local_app, relation, list(normal_fields)) + + def _register_secret_to_relation( + self, relation_name: str, relation_id: int, secret_id: str, group: SecretGroup + ): + """Fetch secrets and apply local label on them. + + [MAGIC HERE] + If we fetch a secret using get_secret(id=, label=), + then will be "stuck" on the Secret object, whenever it may + appear (i.e. as an event attribute, or fetched manually) on future occasions. + + This will allow us to uniquely identify the secret on Provider side (typically on + 'secret-changed' events), and map it to the corresponding relation. + """ + label = self._generate_secret_label(relation_name, relation_id, group) + + # Fetching the Secret's meta information ensuring that it's locally getting registered with + CachedSecret(self._model, self.component, label, secret_id).meta + + def _register_secrets_to_relation(self, relation: Relation, params_name_list: List[str]): + """Make sure that secrets of the provided list are locally 'registered' from the databag. + + More on 'locally registered' magic is described in _register_secret_to_relation() method + """ + if not relation.app: + return + + for group in SECRET_GROUPS.groups(): + secret_field = self._generate_secret_field_name(group) + if secret_field in params_name_list and ( + secret_uri := self.get_secret_uri(relation, group) + ): + self._register_secret_to_relation(relation.name, relation.id, secret_uri, group) + + # Optional overrides + + def _legacy_apply_on_fetch(self) -> None: + """This function should provide a list of compatibility functions to be applied when fetching (legacy) data.""" + pass + + def _legacy_apply_on_update(self, fields: List[str]) -> None: + """This function should provide a list of compatibility functions to be applied when writing data. + + Since data may be at a legacy version, migration may be mandatory. + """ + pass + + def _legacy_apply_on_delete(self, fields: List[str]) -> None: + """This function should provide a list of compatibility functions to be applied when deleting (legacy) data.""" + pass + + # Internal helper methods + + @staticmethod + def _is_secret_field(field: str) -> bool: + """Is the field in question a secret reference (URI) field or not?""" + return field.startswith(PROV_SECRET_PREFIX) + + @staticmethod + def _generate_secret_label( + relation_name: str, relation_id: int, group_mapping: SecretGroup + ) -> str: + """Generate unique group_mappings for secrets within a relation context.""" + return f"{relation_name}.{relation_id}.{group_mapping}.secret" + + def _generate_secret_field_name(self, group_mapping: SecretGroup) -> str: + """Generate unique group_mappings for secrets within a relation context.""" + return f"{PROV_SECRET_PREFIX}{group_mapping}" + + def _relation_from_secret_label(self, secret_label: str) -> Optional[Relation]: + """Retrieve the relation that belongs to a secret label.""" + contents = secret_label.split(".") + + if not (contents and len(contents) >= 3): + return + + contents.pop() # ".secret" at the end + contents.pop() # Group mapping + relation_id = contents.pop() + try: + relation_id = int(relation_id) + except ValueError: + return + + # In case '.' character appeared in relation name + relation_name = ".".join(contents) + + try: + return self.get_relation(relation_name, relation_id) + except ModelError: + return + + def _group_secret_fields(self, secret_fields: List[str]) -> Dict[SecretGroup, List[str]]: + """Helper function to arrange secret mappings under their group. + + NOTE: All unrecognized items end up in the 'extra' secret bucket. + Make sure only secret fields are passed! + """ + secret_fieldnames_grouped = {} + for key in secret_fields: + if group := self.secret_label_map.get(key): + secret_fieldnames_grouped.setdefault(group, []).append(key) + else: + secret_fieldnames_grouped.setdefault(SECRET_GROUPS.EXTRA, []).append(key) + return secret_fieldnames_grouped + + def _get_group_secret_contents( + self, + relation: Relation, + group: SecretGroup, + secret_fields: Union[Set[str], List[str]] = [], + ) -> Dict[str, str]: + """Helper function to retrieve collective, requested contents of a secret.""" + if (secret := self._get_relation_secret(relation.id, group)) and ( + secret_data := secret.get_content() + ): + return { + k: v for k, v in secret_data.items() if not secret_fields or k in secret_fields + } + return {} + + def _content_for_secret_group( + self, content: Dict[str, str], secret_fields: Set[str], group_mapping: SecretGroup + ) -> Dict[str, str]: + """Select : pairs from input, that belong to this particular Secret group.""" + if group_mapping == SECRET_GROUPS.EXTRA: + return { + k: v + for k, v in content.items() + if k in secret_fields and k not in self.secret_label_map.keys() + } + + return { + k: v + for k, v in content.items() + if k in secret_fields and self.secret_label_map.get(k) == group_mapping + } + + @juju_secrets_only + def _get_relation_secret_data( + self, relation_id: int, group_mapping: SecretGroup, relation_name: Optional[str] = None + ) -> Optional[Dict[str, str]]: + """Retrieve contents of a Juju Secret that's been stored in the relation databag.""" + secret = self._get_relation_secret(relation_id, group_mapping, relation_name) + if secret: + return secret.get_content() + + # Core operations on Relation Fields manipulations (regardless whether the field is in the databag or in a secret) + # Internal functions to be called directly from transparent public interface functions (+closely related helpers) + + def _process_secret_fields( + self, + relation: Relation, + req_secret_fields: Optional[List[str]], + impacted_rel_fields: List[str], + operation: Callable, + *args, + **kwargs, + ) -> Tuple[Dict[str, str], Set[str]]: + """Isolate target secret fields of manipulation, and execute requested operation by Secret Group.""" + result = {} + + # If the relation started on a databag, we just stay on the databag + # (Rolling upgrades may result in a relation starting on databag, getting secrets enabled on-the-fly) + # self.local_app is sufficient to check (ignored if Requires, never has secrets -- works if Provider) + fallback_to_databag = ( + req_secret_fields + and (self.local_unit == self._model.unit and self.local_unit.is_leader()) + and set(req_secret_fields) & set(relation.data[self.component]) + ) + normal_fields = set(impacted_rel_fields) + if req_secret_fields and self.secrets_enabled and not fallback_to_databag: + normal_fields = normal_fields - set(req_secret_fields) + secret_fields = set(impacted_rel_fields) - set(normal_fields) + + secret_fieldnames_grouped = self._group_secret_fields(list(secret_fields)) + + for group in secret_fieldnames_grouped: + # operation() should return nothing when all goes well + if group_result := operation(relation, group, secret_fields, *args, **kwargs): + # If "meaningful" data was returned, we take it. (Some 'operation'-s only return success/failure.) + if isinstance(group_result, dict): + result.update(group_result) + else: + # If it wasn't found as a secret, let's give it a 2nd chance as "normal" field + # Needed when Juju3 Requires meets Juju2 Provider + normal_fields |= set(secret_fieldnames_grouped[group]) + return (result, normal_fields) + + def _fetch_relation_data_without_secrets( + self, component: Union[Application, Unit], relation: Relation, fields: Optional[List[str]] + ) -> Dict[str, str]: + """Fetching databag contents when no secrets are involved. + + Since the Provider's databag is the only one holding secrest, we can apply + a simplified workflow to read the Require's side's databag. + This is used typically when the Provider side wants to read the Requires side's data, + or when the Requires side may want to read its own data. + """ + if component not in relation.data or not relation.data[component]: + return {} + + if fields: + return { + k: relation.data[component][k] for k in fields if k in relation.data[component] + } + else: + return dict(relation.data[component]) + + def _fetch_relation_data_with_secrets( + self, + component: Union[Application, Unit], + req_secret_fields: Optional[List[str]], + relation: Relation, + fields: Optional[List[str]] = None, + ) -> Dict[str, str]: + """Fetching databag contents when secrets may be involved. + + This function has internal logic to resolve if a requested field may be "hidden" + within a Relation Secret, or directly available as a databag field. Typically + used to read the Provider side's databag (eigher by the Requires side, or by + Provider side itself). + """ + result = {} + normal_fields = [] + + if not fields: + if component not in relation.data: + return {} + + all_fields = list(relation.data[component].keys()) + normal_fields = [field for field in all_fields if not self._is_secret_field(field)] + fields = normal_fields + req_secret_fields if req_secret_fields else normal_fields + + if fields: + result, normal_fields = self._process_secret_fields( + relation, req_secret_fields, fields, self._get_group_secret_contents + ) + + # Processing "normal" fields. May include leftover from what we couldn't retrieve as a secret. + # (Typically when Juju3 Requires meets Juju2 Provider) + if normal_fields: + result.update( + self._fetch_relation_data_without_secrets(component, relation, list(normal_fields)) + ) + return result + + def _update_relation_data_without_secrets( + self, component: Union[Application, Unit], relation: Relation, data: Dict[str, str] + ) -> None: + """Updating databag contents when no secrets are involved.""" + if component not in relation.data or relation.data[component] is None: + return + + if relation: + relation.data[component].update(data) + + def _delete_relation_data_without_secrets( + self, component: Union[Application, Unit], relation: Relation, fields: List[str] + ) -> None: + """Remove databag fields 'fields' from Relation.""" + if component not in relation.data or relation.data[component] is None: + return + + for field in fields: + try: + relation.data[component].pop(field) + except KeyError: + logger.debug( + "Non-existing field '%s' was attempted to be removed from the databag (relation ID: %s)", + str(field), + str(relation.id), + ) + pass + + # Public interface methods + # Handling Relation Fields seamlessly, regardless if in databag or a Juju Secret + + def as_dict(self, relation_id: int) -> UserDict: + """Dict behavior representation of the Abstract Data.""" + return DataDict(self, relation_id) + + def get_relation(self, relation_name, relation_id) -> Relation: + """Safe way of retrieving a relation.""" + relation = self._model.get_relation(relation_name, relation_id) + + if not relation: + raise DataInterfacesError( + "Relation %s %s couldn't be retrieved", relation_name, relation_id + ) + + return relation + + def get_secret_uri(self, relation: Relation, group: SecretGroup) -> Optional[str]: + """Get the secret URI for the corresponding group.""" + secret_field = self._generate_secret_field_name(group) + # if the secret is not managed by this component, + # we need to fetch it from the other side + + # Fix for the linter + if self.my_secret_groups is None: + raise DataInterfacesError("Secrets are not enabled for this component") + component = self.component if group in self.my_secret_groups else relation.app + return relation.data[component].get(secret_field) + + def set_secret_uri(self, relation: Relation, group: SecretGroup, secret_uri: str) -> None: + """Set the secret URI for the corresponding group.""" + secret_field = self._generate_secret_field_name(group) + relation.data[self.component][secret_field] = secret_uri + + def fetch_relation_data( + self, + relation_ids: Optional[List[int]] = None, + fields: Optional[List[str]] = None, + relation_name: Optional[str] = None, + ) -> Dict[int, Dict[str, str]]: + """Retrieves data from relation. + + This function can be used to retrieve data from a relation + in the charm code when outside an event callback. + Function cannot be used in `*-relation-broken` events and will raise an exception. + + Returns: + a dict of the values stored in the relation data bag + for all relation instances (indexed by the relation ID). + """ + self._legacy_apply_on_fetch() + + if not relation_name: + relation_name = self.relation_name + + relations = [] + if relation_ids: + relations = [ + self.get_relation(relation_name, relation_id) for relation_id in relation_ids + ] + else: + relations = self.relations + + data = {} + for relation in relations: + if not relation_ids or (relation_ids and relation.id in relation_ids): + data[relation.id] = self._fetch_specific_relation_data(relation, fields) + return data + + def fetch_relation_field( + self, relation_id: int, field: str, relation_name: Optional[str] = None + ) -> Optional[str]: + """Get a single field from the relation data.""" + return ( + self.fetch_relation_data([relation_id], [field], relation_name) + .get(relation_id, {}) + .get(field) + ) + + def fetch_my_relation_data( + self, + relation_ids: Optional[List[int]] = None, + fields: Optional[List[str]] = None, + relation_name: Optional[str] = None, + ) -> Optional[Dict[int, Dict[str, str]]]: + """Fetch data of the 'owner' (or 'this app') side of the relation. + + NOTE: Since only the leader can read the relation's 'this_app'-side + Application databag, the functionality is limited to leaders + """ + self._legacy_apply_on_fetch() + + if not relation_name: + relation_name = self.relation_name + + relations = [] + if relation_ids: + relations = [ + self.get_relation(relation_name, relation_id) for relation_id in relation_ids + ] + else: + relations = self.relations + + data = {} + for relation in relations: + if not relation_ids or relation.id in relation_ids: + data[relation.id] = self._fetch_my_specific_relation_data(relation, fields) + return data + + def fetch_my_relation_field( + self, relation_id: int, field: str, relation_name: Optional[str] = None + ) -> Optional[str]: + """Get a single field from the relation data -- owner side. + + NOTE: Since only the leader can read the relation's 'this_app'-side + Application databag, the functionality is limited to leaders + """ + if relation_data := self.fetch_my_relation_data([relation_id], [field], relation_name): + return relation_data.get(relation_id, {}).get(field) + + @leader_only + def update_relation_data(self, relation_id: int, data: dict) -> None: + """Update the data within the relation.""" + self._legacy_apply_on_update(list(data.keys())) + + relation_name = self.relation_name + relation = self.get_relation(relation_name, relation_id) + return self._update_relation_data(relation, data) + + @leader_only + def delete_relation_data(self, relation_id: int, fields: List[str]) -> None: + """Remove field from the relation.""" + self._legacy_apply_on_delete(fields) + + relation_name = self.relation_name + relation = self.get_relation(relation_name, relation_id) + return self._delete_relation_data(relation, fields) + + +class EventHandlers(Object): + """Requires-side of the relation.""" + + def __init__(self, charm: CharmBase, relation_data: Data, unique_key: str = ""): + """Manager of base client relations.""" + if not unique_key: + unique_key = relation_data.relation_name + super().__init__(charm, unique_key) + + self.charm = charm + self.relation_data = relation_data + + self.framework.observe( + charm.on[self.relation_data.relation_name].relation_changed, + self._on_relation_changed_event, + ) + + self.framework.observe( + self.charm.on[relation_data.relation_name].relation_created, + self._on_relation_created_event, + ) + + self.framework.observe( + charm.on.secret_changed, + self._on_secret_changed_event, + ) + + # Event handlers + + def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: + """Event emitted when the relation is created.""" + pass + + @abstractmethod + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation data has changed.""" + raise NotImplementedError + + @abstractmethod + def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: + """Event emitted when the relation data has changed.""" + raise NotImplementedError + + def _diff(self, event: RelationChangedEvent) -> Diff: + """Retrieves the diff of the data in the relation changed databag. + + Args: + event: relation changed event. + + Returns: + a Diff instance containing the added, deleted and changed + keys from the event relation databag. + """ + return diff(event, self.relation_data.data_component) + + +# Base ProviderData and RequiresData + + +class ProviderData(Data): + """Base provides-side of the data products relation.""" + + RESOURCE_FIELD = "database" + + def __init__( + self, + model: Model, + relation_name: str, + status_schema_path: OptionalPathLike = None, + ) -> None: + super().__init__(model, relation_name) + self.data_component = self.local_app + self._local_secret_fields = [] + self._remote_secret_fields = list(self.SECRET_FIELDS) + self._status_schema = ( + {} if not status_schema_path else self._load_status_schema(Path(status_schema_path)) + ) + + def _load_status_schema(self, schema_path: Path) -> Dict[int, RelationStatus]: + """Load JSON schema defining status codes and their details. + + Args: + schema_path: JSON schema file path. + + Raises: + FileNotFoundError: If the provided path is invalid/inaccessible. + + Returns: + dict[int, RelationStatusDict]: Mapping of status code to RelationStatus data objects. + """ + if not schema_path.exists(): + raise FileNotFoundError(f"Can't locate status schema file: {schema_path}") + + content = json.load(open(schema_path, "r")) + + return {s["code"]: RelationStatus(**s) for s in content.get("statuses", [])} + + def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: + """Set values for fields not caring whether it's a secret or not.""" + keys = set(data.keys()) + if self.fetch_relation_field(relation.id, self.RESOURCE_FIELD) is None and ( + keys - {"endpoints", "read-only-endpoints", "replset"} + ): + raise PrematureDataAccessError( + "Premature access to relation data, update is forbidden before the connection is initialized." + ) + super()._update_relation_data(relation, data) + + # Public methods - "native" + + def set_credentials(self, relation_id: int, username: str, password: str) -> None: + """Set credentials. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + username: user that was created. + password: password of the created user. + """ + self.update_relation_data(relation_id, {"username": username, "password": password}) + + def set_entity_credentials( + self, relation_id: int, entity_name: str, entity_password: Optional[str] = None + ) -> None: + """Set entity credentials. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + entity_name: name of the created entity + entity_password: password of the created entity. + """ + self.update_relation_data( + relation_id, + {"entity-name": entity_name, "entity-password": entity_password}, + ) + + def set_tls(self, relation_id: int, tls: str) -> None: + """Set whether TLS is enabled. + + Args: + relation_id: the identifier for a particular relation. + tls: whether tls is enabled (True or False). + """ + self.update_relation_data(relation_id, {"tls": tls}) + + def set_tls_ca(self, relation_id: int, tls_ca: str) -> None: + """Set the TLS CA in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + tls_ca: TLS certification authority. + """ + self.update_relation_data(relation_id, {"tls-ca": tls_ca}) + + @leader_only + def get_statuses(self, relation_id: int) -> Dict[int, RelationStatus]: + """Return all currently active statuses on this relation. Can only be called on leader units. + + Args: + relation_id (int): the identifier for a particular relation. + + Returns: + Dict[int, RelationStatus]: A mapping of status code to RelationStatus instances. + """ + raw = self.fetch_my_relation_field(relation_id, STATUS_FIELD) or "[]" + + return {item["code"]: RelationStatus(**item) for item in json.loads(raw)} + + @overload + def raise_status(self, relation_id: int, status: int) -> None: ... + + @overload + def raise_status(self, relation_id: int, status: RelationStatusDict) -> None: ... + + @overload + def raise_status(self, relation_id: int, status: RelationStatus) -> None: ... + + def raise_status( + self, relation_id: int, status: Union[RelationStatus, RelationStatusDict, int] + ) -> None: + """Raise a status on the relation. Can only be called on leader units. + + Args: + relation_id (int): the identifier for a particular relation. + status (RelationStatus | RelationStatusDict | int): A representation of the status being raised, + which could be either a RelationStatus, an appropriate dict, or the numeric status code. + + Raises: + ValueError: If the status provided is not correctly formatted. + """ + if isinstance(status, int): + # we expect the status schema to be defined in this case. + if status not in self._status_schema: + raise KeyError(f"Status code [{status}] not defined.") + _status = self._status_schema[status] + elif isinstance(status, dict): + _status = RelationStatus(**status) + elif isinstance(status, RelationStatus): + _status = status + else: + raise ValueError( + "The status should be either a RelationStatus, an appropriate dict, or the numeric status code." + ) + + statuses = self.get_statuses(relation_id) + statuses.update({_status.code: _status}) + serialized = json.dumps([asdict(statuses[k]) for k in sorted(statuses)]) + self.update_relation_data(relation_id, {STATUS_FIELD: serialized}) + + def resolve_status(self, relation_id: int, status_code: int) -> None: + """Set a previously raised status as resolved. + + Args: + relation_id (int): the identifier for a particular relation. + status_code (int): the numeric code of the resolved status. + """ + statuses = self.get_statuses(relation_id) + if status_code not in statuses: + logger.error(f"Status [{status_code}] has never been raised before.") + return + + statuses.pop(status_code) + serialized = json.dumps([asdict(statuses[k]) for k in sorted(statuses)]) + self.update_relation_data(relation_id, {STATUS_FIELD: serialized}) + + def clear_statuses(self, relation_id: int) -> None: + """Clear all previously raised statuses. + + Args: + relation_id (int): the identifier for a particular relation. + """ + self.delete_relation_data(relation_id, [STATUS_FIELD]) + + # Public functions -- inherited + + fetch_my_relation_data = leader_only(Data.fetch_my_relation_data) + fetch_my_relation_field = leader_only(Data.fetch_my_relation_field) + + def _load_secrets_from_databag(self, relation: Relation) -> None: + """Load secrets from the databag.""" + requested_secrets = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) + provided_secrets = get_encoded_list(relation, relation.app, PROV_SECRET_FIELDS) + if requested_secrets is not None: + self._local_secret_fields = requested_secrets + + if provided_secrets is not None: + self._remote_secret_fields = provided_secrets + + +class RequirerData(Data): + """Requirer-side of the relation.""" + + SECRET_FIELDS = [ + "username", + "password", + "tls", + "tls-ca", + "uris", + "read-only-uris", + "entity-name", + "entity-password", + ] + + def __init__( + self, + model, + relation_name: str, + extra_user_roles: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + requested_entity_secret: Optional[str] = None, + requested_entity_name: Optional[str] = None, + requested_entity_password: Optional[str] = None, + prefix_matching: Optional[str] = None, + ): + """Manager of base client relations.""" + super().__init__(model, relation_name) + self.extra_user_roles = extra_user_roles + self.extra_group_roles = extra_group_roles + self.entity_type = entity_type + self.entity_permissions = entity_permissions + self.requested_entity_secret = requested_entity_secret + self.requested_entity_name = requested_entity_name + self.requested_entity_password = requested_entity_password + self.prefix_matching = prefix_matching + + if ( + self.requested_entity_secret or self.requested_entity_name + ) and not self.secrets_enabled: + raise SecretsUnavailableError("Secrets unavailable on current Juju version") + + if self.requested_entity_secret and ( + self.requested_entity_name or self.requested_entity_password + ): + raise IllegalOperationError("Unable to use provided and automated entity name secret") + + if self.requested_entity_password and not self.requested_entity_name: + raise IllegalOperationError("Unable to set entity password without an entity name") + + self._validate_entity_type() + self._validate_entity_permissions() + + self._remote_secret_fields = list(self.SECRET_FIELDS) + self._local_secret_fields = [ + field + for field in self.SECRET_LABEL_MAP.keys() + if field not in self._remote_secret_fields + ] + if additional_secret_fields: + self._remote_secret_fields += additional_secret_fields + self.data_component = self.local_unit + + # Internal functions + + def _is_resource_created_for_relation(self, relation: Relation) -> bool: + if not relation.app: + return False + + data = self.fetch_relation_data( + [relation.id], + ["username", "password", "entity-name", "entity-password"], + ).get(relation.id, {}) + + return any( + [ + all(bool(data.get(field)) for field in ("username", "password")), + all(bool(data.get(field)) for field in ("entity-name",)), + ] + ) + + def _validate_entity_type(self) -> None: + """Validates the consistency of the provided entity-type and its extra roles.""" + if self.entity_type and self.entity_type not in {ENTITY_USER, ENTITY_GROUP}: + raise ValueError("Invalid entity-type. Possible values are USER and GROUP") + + if self.entity_type == ENTITY_USER and self.extra_group_roles: + raise ValueError("Inconsistent entity information. Use extra_user_roles instead") + + if self.entity_type == ENTITY_GROUP and self.extra_user_roles: + raise ValueError("Inconsistent entity information. Use extra_group_roles instead") + + def _validate_entity_permissions(self) -> None: + """Validates whether the provided entity permissions follow the right JSON format.""" + if not self.entity_permissions: + return + + accepted_keys = {"resource_name", "resource_type", "privileges"} + + try: + permissions = json.loads(self.entity_permissions) + for permission in permissions: + if permission.keys() != accepted_keys: + raise ValueError("Invalid entity permissions format. See accepted keys") + except json.decoder.JSONDecodeError: + raise ValueError("Invalid entity permissions format. It must be JSON format") + + # Public functions + + def is_resource_created(self, relation_id: Optional[int] = None) -> bool: + """Check if the resource has been created. + + This function can be used to check if the Provider answered with data in the charm code + when outside an event callback. + + Args: + relation_id (int, optional): When provided the check is done only for the relation id + provided, otherwise the check is done for all relations + + Returns: + True or False + + Raises: + IndexError: If relation_id is provided but that relation does not exist + """ + if relation_id is not None: + try: + relation = [relation for relation in self.relations if relation.id == relation_id][ + 0 + ] + return self._is_resource_created_for_relation(relation) + except IndexError: + raise IndexError(f"relation id {relation_id} cannot be accessed") + else: + return ( + all( + self._is_resource_created_for_relation(relation) for relation in self.relations + ) + if self.relations + else False + ) + + # Public functions -- inherited + + fetch_my_relation_data = leader_only(Data.fetch_my_relation_data) + fetch_my_relation_field = leader_only(Data.fetch_my_relation_field) + + def _load_secrets_from_databag(self, relation: Relation) -> None: + """Load secrets from the databag.""" + requested_secrets = get_encoded_list(relation, self.local_unit, REQ_SECRET_FIELDS) + provided_secrets = get_encoded_list(relation, self.local_unit, PROV_SECRET_FIELDS) + if requested_secrets: + self._remote_secret_fields = requested_secrets + + if provided_secrets: + self._local_secret_fields = provided_secrets + + +class StatusEventBase(RelationEvent): + """Base class for relation status change events.""" + + def __init__( + self, + handle: Handle, + relation: Relation, + status: RelationStatus, + app: Optional[Application] = None, + unit: Optional[Unit] = None, + ): + super().__init__(handle, relation, app=app, unit=unit) + self.status = status + + def snapshot(self) -> dict: + """Return a snapshot of the event.""" + return super().snapshot() | {"status": json.dumps(asdict(self.status))} + + def restore(self, snapshot: dict): + """Restore the event from a snapshot.""" + super().restore(snapshot) + self.status = RelationStatus(**json.loads(snapshot["status"])) + + @property + def active_statuses(self) -> List[RelationStatus]: + """Returns a list of all currently active statuses on this relation.""" + if not self.relation.app: + return [] + + raw = json.loads(self.relation.data[self.relation.app].get(STATUS_FIELD, "[]")) + + return [RelationStatus(**item) for item in raw] + + +class StatusRaisedEvent(StatusEventBase): + """Event emitted on the requirer when a new status is being raised by the provider on relation.""" + + +class StatusResolvedEvent(StatusEventBase): + """Event emitted on the requirer when a status is marked as resolved by the provider on relation.""" + + +class RequirerCharmEvents(CharmEvents): + """Base events for data requirer charms.""" + + status_raised = EventSource(StatusRaisedEvent) + status_resolved = EventSource(StatusResolvedEvent) + + +class RequirerEventHandlers(EventHandlers): + """Requires-side of the relation.""" + + def __init__(self, charm: CharmBase, relation_data: RequirerData, unique_key: str = ""): + """Manager of base client relations.""" + super().__init__(charm, relation_data, unique_key) + + def _main_credentials_shared(self, diff: Diff) -> bool: + """Whether the relation data-bag contains username / password keys.""" + user_secret = self.relation_data._generate_secret_field_name(SECRET_GROUPS.USER) + return any( + [ + user_secret in diff.added, + "username" in diff.added and "password" in diff.added, + ] + ) + + def _entity_credentials_shared(self, diff: Diff) -> bool: + """Whether the relation data-bag contains rolename / password keys.""" + entity_secret = self.relation_data._generate_secret_field_name(SECRET_GROUPS.ENTITY) + return any( + [ + entity_secret in diff.added, + "entity-name" in diff.added, + ] + ) + + # Event handlers + + def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: + """Event emitted when the relation is created.""" + if not self.relation_data.local_unit.is_leader(): + return + + if self.relation_data.remote_secret_fields: + if self.relation_data.SCOPE == Scope.APP: + set_encoded_field( + event.relation, + self.relation_data.local_app, + REQ_SECRET_FIELDS, + self.relation_data.remote_secret_fields, + ) + + set_encoded_field( + event.relation, + self.relation_data.local_unit, + REQ_SECRET_FIELDS, + self.relation_data.remote_secret_fields, + ) + + if self.relation_data.local_secret_fields: + if self.relation_data.SCOPE == Scope.APP: + set_encoded_field( + event.relation, + self.relation_data.local_app, + PROV_SECRET_FIELDS, + self.relation_data.local_secret_fields, + ) + set_encoded_field( + event.relation, + self.relation_data.local_unit, + PROV_SECRET_FIELDS, + self.relation_data.local_secret_fields, + ) + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation has changed.""" + # Retrieve old statuses from "data" + old_data = get_encoded_dict(event.relation, self.relation_data.local_unit, "data") or {} + old_statuses = json.loads(old_data.get(STATUS_FIELD, "[]")) + previous_codes = {status.get("code") for status in old_statuses} + + # Compute current statuses + current_statuses = json.loads( + self.relation_data.fetch_relation_field(event.relation.id, STATUS_FIELD) or "[]" + ) + current_codes = {status.get("code") for status in current_statuses} + + # Detect changes + raised = current_codes - previous_codes + resolved = previous_codes - current_codes + + for status_code in raised: + logger.debug(f"Status [{status_code}] raised") + _status = next(s for s in current_statuses if s["code"] == status_code) + _status_instance = RelationStatus(**_status) + getattr(self.on, "status_raised").emit( + event.relation, + status=_status_instance, + app=event.app, + unit=event.unit, + ) + + for status_code in resolved: + logger.debug(f"Status [{status_code}] resolved") + _status = next(s for s in old_statuses if s["code"] == status_code) + _status_instance = RelationStatus(**_status) + getattr(self.on, "status_resolved").emit( + event.relation, + status=_status_instance, + app=event.app, + unit=event.unit, + ) + + +class ProviderEventHandlers(EventHandlers): + """Provider-side of the relation.""" + + def __init__(self, charm: CharmBase, relation_data: ProviderData, unique_key: str = ""): + """Manager of base client relations.""" + super().__init__(charm, relation_data, unique_key) + + @staticmethod + def _validate_entity_consistency(event: RelationEvent, diff: Diff) -> None: + """Validates that entity information is not changed after relation is established. + + - When entity-type changes, backwards compatibility is broken. + - When extra-user-roles changes, role membership checks become incredibly complex. + - When extra-group-roles changes, role membership checks become incredibly complex. + """ + if not isinstance(event, RelationChangedEvent): + return + + for key in ["entity-type", "extra-user-roles", "extra-group-roles"]: + if key in diff.changed: + raise ValueError(f"Cannot change {key} after relation has already been created") + + # Event handlers + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation data has changed.""" + requested_secrets = get_encoded_list(event.relation, event.relation.app, REQ_SECRET_FIELDS) + provided_secrets = get_encoded_list(event.relation, event.relation.app, PROV_SECRET_FIELDS) + if requested_secrets is not None: + self.relation_data._local_secret_fields = requested_secrets + + if provided_secrets is not None: + self.relation_data._remote_secret_fields = provided_secrets + + +################################################################################ +# Peer Relation Data +################################################################################ + + +class DataPeerData(RequirerData, ProviderData): + """Represents peer relations data.""" + + SECRET_FIELDS = [] + SECRET_FIELD_NAME = "internal_secret" + SECRET_LABEL_MAP = {} + + def __init__( + self, + model, + relation_name: str, + additional_secret_fields: Optional[List[str]] = [], + additional_secret_group_mapping: Dict[str, str] = {}, + secret_field_name: Optional[str] = None, + deleted_label: Optional[str] = None, + ): + RequirerData.__init__( + self, + model=model, + relation_name=relation_name, + additional_secret_fields=additional_secret_fields, + ) + self.secret_field_name = secret_field_name if secret_field_name else self.SECRET_FIELD_NAME + self.deleted_label = deleted_label + self._secret_label_map = {} + + # Legacy information holders + self._legacy_labels = [] + self._legacy_secret_uri = None + + # Secrets that are being dynamically added within the scope of this event handler run + self._new_secrets = [] + self._additional_secret_group_mapping = additional_secret_group_mapping + + for group, fields in additional_secret_group_mapping.items(): + if group not in SECRET_GROUPS.groups(): + setattr(SECRET_GROUPS, group, group) + for field in fields: + secret_group = SECRET_GROUPS.get_group(group) + internal_field = self._field_to_internal_name(field, secret_group) + self._secret_label_map.setdefault(group, []).append(internal_field) + self._remote_secret_fields.append(internal_field) + + @property + def scope(self) -> Optional[Scope]: + """Turn component information into Scope.""" + if isinstance(self.component, Application): + return Scope.APP + if isinstance(self.component, Unit): + return Scope.UNIT + + @property + def secret_label_map(self) -> Dict[str, str]: + """Property storing secret mappings.""" + return self._secret_label_map + + @property + def static_secret_fields(self) -> List[str]: + """Re-definition of the property in a way that dynamically extended list is retrieved.""" + return self._remote_secret_fields + + @property + def local_secret_fields(self) -> List[str]: + """Re-definition of the property in a way that dynamically extended list is retrieved.""" + return ( + self.static_secret_fields if self.static_secret_fields else self.current_secret_fields + ) + + @property + def current_secret_fields(self) -> List[str]: + """Helper method to get all currently existing secret fields (added statically or dynamically).""" + if not self.secrets_enabled: + return [] + + if len(self._model.relations[self.relation_name]) > 1: + raise ValueError(f"More than one peer relation on {self.relation_name}") + + relation = self._model.relations[self.relation_name][0] + fields = [] + + ignores = [ + SECRET_GROUPS.get_group("user"), + SECRET_GROUPS.get_group("tls"), + SECRET_GROUPS.get_group("mtls"), + SECRET_GROUPS.get_group("entity"), + ] + for group in SECRET_GROUPS.groups(): + if group in ignores: + continue + if content := self._get_group_secret_contents(relation, group): + fields += list(content.keys()) + return list(set(fields) | set(self._new_secrets)) + + @dynamic_secrets_only + def set_secret( + self, + relation_id: int, + field: str, + value: str, + group_mapping: Optional[SecretGroup] = None, + ) -> None: + """Public interface method to add a Relation Data field specifically as a Juju Secret. + + Args: + relation_id: ID of the relation + field: The secret field that is to be added + value: The string value of the secret + group_mapping: The name of the "secret group", in case the field is to be added to an existing secret + """ + self._legacy_apply_on_update([field]) + + full_field = self._field_to_internal_name(field, group_mapping) + if self.secrets_enabled and full_field not in self.current_secret_fields: + self._new_secrets.append(full_field) + if self.valid_field_pattern(field, full_field): + self.update_relation_data(relation_id, {full_field: value}) + + # Unlike for set_secret(), there's no harm using this operation with static secrets + # The restricion is only added to keep the concept clear + @dynamic_secrets_only + def get_secret( + self, + relation_id: int, + field: str, + group_mapping: Optional[SecretGroup] = None, + ) -> Optional[str]: + """Public interface method to fetch secrets only.""" + self._legacy_apply_on_fetch() + + full_field = self._field_to_internal_name(field, group_mapping) + if ( + self.secrets_enabled + and full_field not in self.current_secret_fields + and field not in self.current_secret_fields + ): + return + if self.valid_field_pattern(field, full_field): + return self.fetch_my_relation_field(relation_id, full_field) + + @dynamic_secrets_only + def delete_secret( + self, + relation_id: int, + field: str, + group_mapping: Optional[SecretGroup] = None, + ) -> Optional[str]: + """Public interface method to delete secrets only.""" + self._legacy_apply_on_delete([field]) + + full_field = self._field_to_internal_name(field, group_mapping) + if self.secrets_enabled and full_field not in self.current_secret_fields: + logger.warning(f"Secret {field} from group {group_mapping} was not found") + return + + if self.valid_field_pattern(field, full_field): + self.delete_relation_data(relation_id, [full_field]) + + ########################################################################## + # Helpers + ########################################################################## + + @staticmethod + def _field_to_internal_name(field: str, group: Optional[SecretGroup]) -> str: + if not group or group == SECRET_GROUPS.EXTRA: + return field + return f"{field}{GROUP_SEPARATOR}{group}" + + @staticmethod + def _internal_name_to_field(name: str) -> Tuple[str, SecretGroup]: + parts = name.split(GROUP_SEPARATOR) + if not len(parts) > 1: + return (parts[0], SECRET_GROUPS.EXTRA) + secret_group = SECRET_GROUPS.get_group(parts[1]) + if not secret_group: + raise ValueError(f"Invalid secret field {name}") + return (parts[0], secret_group) + + def _group_secret_fields(self, secret_fields: List[str]) -> Dict[SecretGroup, List[str]]: + """Helper function to arrange secret mappings under their group. + + NOTE: All unrecognized items end up in the 'extra' secret bucket. + Make sure only secret fields are passed! + """ + secret_fieldnames_grouped = {} + for key in secret_fields: + field, group = self._internal_name_to_field(key) + secret_fieldnames_grouped.setdefault(group, []).append(field) + return secret_fieldnames_grouped + + def _content_for_secret_group( + self, content: Dict[str, str], secret_fields: Set[str], group_mapping: SecretGroup + ) -> Dict[str, str]: + """Select : pairs from input, that belong to this particular Secret group.""" + if group_mapping == SECRET_GROUPS.EXTRA: + return {k: v for k, v in content.items() if k in self.local_secret_fields} + return { + self._internal_name_to_field(k)[0]: v + for k, v in content.items() + if k in self.local_secret_fields + } + + def valid_field_pattern(self, field: str, full_field: str) -> bool: + """Check that no secret group is attempted to be used together without secrets being enabled. + + Secrets groups are impossible to use with versions that are not yet supporting secrets. + """ + if not self.secrets_enabled and full_field != field: + logger.error( + f"Can't access {full_field}: no secrets available (i.e. no secret groups either)." + ) + return False + return True + + def _load_secrets_from_databag(self, relation: Relation) -> None: + """Load secrets from the databag.""" + requested_secrets = get_encoded_list(relation, self.component, REQ_SECRET_FIELDS) + provided_secrets = get_encoded_list(relation, self.component, PROV_SECRET_FIELDS) + if requested_secrets: + self._remote_secret_fields = requested_secrets + + if provided_secrets: + self._local_secret_fields = provided_secrets + + ########################################################################## + # Backwards compatibility / Upgrades + ########################################################################## + # These functions are used to keep backwards compatibility on upgrades + # Policy: + # All data is kept intact until the first write operation. (This allows a minimal + # grace period during which rollbacks are fully safe. For more info see spec.) + # All data involves: + # - databag + # - secrets content + # - secret labels (!!!) + # Legacy functions must return None, and leave an equally consistent state whether + # they are executed or skipped (as a high enough versioned execution environment may + # not require so) + + # Full legacy stack for each operation + + def _legacy_apply_on_fetch(self) -> None: + """All legacy functions to be applied on fetch.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + + def _legacy_apply_on_update(self, fields) -> None: + """All legacy functions to be applied on update.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + self._legacy_migration_remove_secret_from_databag(relation, fields) + self._legacy_migration_remove_secret_field_name_from_databag(relation) + + def _legacy_apply_on_delete(self, fields) -> None: + """All legacy functions to be applied on delete.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + self._legacy_compat_check_deleted_label(relation, fields) + + # Compatibility + + @legacy_apply_from_version(18) + def _legacy_compat_check_deleted_label(self, relation, fields) -> None: + """Helper function for legacy behavior. + + As long as https://bugs.launchpad.net/juju/+bug/2028094 wasn't fixed, + we did not delete fields but rather kept them in the secret with a string value + expressing invalidity. This function is maintainnig that behavior when needed. + """ + if not self.deleted_label: + return + + current_data = self.fetch_my_relation_data([relation.id], fields) + if current_data is not None: + # Check if the secret we wanna delete actually exists + # Given the "deleted label", here we can't rely on the default mechanism (i.e. 'key not found') + if non_existent := (set(fields) & set(self.local_secret_fields)) - set( + current_data.get(relation.id, []) + ): + logger.debug( + "Non-existing secret %s was attempted to be removed.", + ", ".join(non_existent), + ) + + @legacy_apply_from_version(18) + def _legacy_compat_secret_uri_from_databag(self, relation) -> None: + """Fetching the secret URI from the databag, in case stored there.""" + self._legacy_secret_uri = relation.data[self.component].get( + self._generate_secret_field_name(), None + ) + + @legacy_apply_from_version(34) + def _legacy_compat_generate_prev_labels(self) -> None: + """Generator for legacy secret label names, for backwards compatibility. + + Secret label is part of the data that MUST be maintained across rolling upgrades. + In case there may be a change on a secret label, the old label must be recognized + after upgrades, and left intact until the first write operation -- when we roll over + to the new label. + + This function keeps "memory" of previously used secret labels. + NOTE: Return value takes decorator into account -- all 'legacy' functions may return `None` + + v0.34 (rev69): Fixing issue https://github.com/canonical/data-platform-libs/issues/155 + meant moving from '.' (i.e. 'mysql.app', 'mysql.unit') + to labels '..' (like 'peer.mysql.app') + """ + if self._legacy_labels: + return + + result = [] + members = [self._model.app.name] + if self.scope: + members.append(self.scope.value) + result.append(f"{'.'.join(members)}") + self._legacy_labels = result + + # Migration + + @legacy_apply_from_version(18) + def _legacy_migration_remove_secret_from_databag(self, relation, fields: List[str]) -> None: + """For Rolling Upgrades -- when moving from databag to secrets usage. + + Practically what happens here is to remove stuff from the databag that is + to be stored in secrets. + """ + if not self.local_secret_fields: + return + + secret_fields_passed = set(self.local_secret_fields) & set(fields) + for field in secret_fields_passed: + if self._fetch_relation_data_without_secrets(self.component, relation, [field]): + self._delete_relation_data_without_secrets(self.component, relation, [field]) + + @legacy_apply_from_version(18) + def _legacy_migration_remove_secret_field_name_from_databag(self, relation) -> None: + """Making sure that the old databag URI is gone. + + This action should not be executed more than once. + + There was a phase (before moving secrets usage to libs) when charms saved the peer + secret URI to the databag, and used this URI from then on to retrieve their secret. + When upgrading to charm versions using this library, we need to add a label to the + secret and access it via label from than on, and remove the old traces from the databag. + """ + # Nothing to do if 'internal-secret' is not in the databag + if not (relation.data[self.component].get(self._generate_secret_field_name())): + return + + # Making sure that the secret receives its label + # (This should have happened by the time we get here, rather an extra security measure.) + secret = self._get_relation_secret(relation.id) + + # Either app scope secret with leader executing, or unit scope secret + leader_or_unit_scope = self.component != self.local_app or self.local_unit.is_leader() + if secret and leader_or_unit_scope: + # Databag reference to the secret URI can be removed, now that it's labelled + relation.data[self.component].pop(self._generate_secret_field_name(), None) + + ########################################################################## + # Event handlers + ########################################################################## + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation has changed.""" + pass + + def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: + """Event emitted when the secret has changed.""" + pass + + ########################################################################## + # Overrides of Relation Data handling functions + ########################################################################## + + def _generate_secret_label( + self, relation_name: str, relation_id: int, group_mapping: SecretGroup + ) -> str: + members = [relation_name, self._model.app.name] + if self.scope: + members.append(self.scope.value) + if group_mapping != SECRET_GROUPS.EXTRA: + members.append(group_mapping) + return f"{'.'.join(members)}" + + def _generate_secret_field_name(self, group_mapping: SecretGroup = SECRET_GROUPS.EXTRA) -> str: + """Generate unique group_mappings for secrets within a relation context.""" + return f"{self.secret_field_name}" + + @juju_secrets_only + def _get_relation_secret( + self, + relation_id: int, + group_mapping: SecretGroup = SECRET_GROUPS.EXTRA, + relation_name: Optional[str] = None, + ) -> Optional[CachedSecret]: + """Retrieve a Juju Secret specifically for peer relations. + + In case this code may be executed within a rolling upgrade, and we may need to + migrate secrets from the databag to labels, we make sure to stick the correct + label on the secret, and clean up the local databag. + """ + if not relation_name: + relation_name = self.relation_name + + relation = self._model.get_relation(relation_name, relation_id) + if not relation: + return + + label = self._generate_secret_label(relation_name, relation_id, group_mapping) + + # URI or legacy label is only to applied when moving single legacy secret to a (new) label + if group_mapping == SECRET_GROUPS.EXTRA: + # Fetching the secret with fallback to URI (in case label is not yet known) + # Label would we "stuck" on the secret in case it is found + return self.secrets.get( + label, self._legacy_secret_uri, legacy_labels=self._legacy_labels + ) + return self.secrets.get(label) + + def _get_group_secret_contents( + self, + relation: Relation, + group: SecretGroup, + secret_fields: Union[Set[str], List[str]] = [], + ) -> Dict[str, str]: + """Helper function to retrieve collective, requested contents of a secret.""" + secret_fields = [self._internal_name_to_field(k)[0] for k in secret_fields] + result = super()._get_group_secret_contents(relation, group, secret_fields) + if self.deleted_label: + result = {key: result[key] for key in result if result[key] != self.deleted_label} + if self._additional_secret_group_mapping: + return {self._field_to_internal_name(key, group): result[key] for key in result} + return result + + @either_static_or_dynamic_secrets + def _fetch_my_specific_relation_data( + self, relation: Relation, fields: Optional[List[str]] + ) -> Dict[str, str]: + """Fetch data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" + return self._fetch_relation_data_with_secrets( + self.component, self.local_secret_fields, relation, fields + ) + + @either_static_or_dynamic_secrets + def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: + """Update data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" + self._load_secrets_from_databag(relation) + + _, normal_fields = self._process_secret_fields( + relation, + self.local_secret_fields, + list(data), + self._add_or_update_relation_secrets, + data=data, + uri_to_databag=False, + ) + + normal_content = {k: v for k, v in data.items() if k in normal_fields} + self._update_relation_data_without_secrets(self.component, relation, normal_content) + + @either_static_or_dynamic_secrets + def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: + """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" + self._load_secrets_from_databag(relation) + if self.local_secret_fields and self.deleted_label: + _, normal_fields = self._process_secret_fields( + relation, + self.local_secret_fields, + fields, + self._update_relation_secret, + data=dict.fromkeys(fields, self.deleted_label), + ) + else: + _, normal_fields = self._process_secret_fields( + relation, + self.local_secret_fields, + fields, + self._delete_relation_secret, + fields=fields, + ) + self._delete_relation_data_without_secrets(self.component, relation, list(normal_fields)) + + def fetch_relation_data( + self, + relation_ids: Optional[List[int]] = None, + fields: Optional[List[str]] = None, + relation_name: Optional[str] = None, + ) -> Dict[int, Dict[str, str]]: + """This method makes no sense for a Peer Relation.""" + raise NotImplementedError( + "Peer Relation only supports 'self-side' fetch methods: " + "fetch_my_relation_data() and fetch_my_relation_field()" + ) + + def fetch_relation_field( + self, relation_id: int, field: str, relation_name: Optional[str] = None + ) -> Optional[str]: + """This method makes no sense for a Peer Relation.""" + raise NotImplementedError( + "Peer Relation only supports 'self-side' fetch methods: " + "fetch_my_relation_data() and fetch_my_relation_field()" + ) + + ########################################################################## + # Public functions -- inherited + ########################################################################## + + fetch_my_relation_data = Data.fetch_my_relation_data + fetch_my_relation_field = Data.fetch_my_relation_field + + +class DataPeerEventHandlers(RequirerEventHandlers): + """Requires-side of the relation.""" + + def __init__(self, charm: CharmBase, relation_data: RequirerData, unique_key: str = ""): + """Manager of base client relations.""" + super().__init__(charm, relation_data, unique_key) + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation has changed.""" + pass + + def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: + """Event emitted when the secret has changed.""" + pass + + +class DataPeer(DataPeerData, DataPeerEventHandlers): + """Represents peer relations.""" + + def __init__( + self, + charm, + relation_name: str, + additional_secret_fields: Optional[List[str]] = [], + additional_secret_group_mapping: Dict[str, str] = {}, + secret_field_name: Optional[str] = None, + deleted_label: Optional[str] = None, + unique_key: str = "", + ): + DataPeerData.__init__( + self, + charm.model, + relation_name, + additional_secret_fields, + additional_secret_group_mapping, + secret_field_name, + deleted_label, + ) + DataPeerEventHandlers.__init__(self, charm, self, unique_key) + + +class DataPeerUnitData(DataPeerData): + """Unit data abstraction representation.""" + + SCOPE = Scope.UNIT + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + +class DataPeerUnit(DataPeerUnitData, DataPeerEventHandlers): + """Unit databag representation.""" + + def __init__( + self, + charm, + relation_name: str, + additional_secret_fields: Optional[List[str]] = [], + additional_secret_group_mapping: Dict[str, str] = {}, + secret_field_name: Optional[str] = None, + deleted_label: Optional[str] = None, + unique_key: str = "", + ): + DataPeerData.__init__( + self, + charm.model, + relation_name, + additional_secret_fields, + additional_secret_group_mapping, + secret_field_name, + deleted_label, + ) + DataPeerEventHandlers.__init__(self, charm, self, unique_key) + + +class DataPeerOtherUnitData(DataPeerUnitData): + """Unit data abstraction representation.""" + + def __init__(self, unit: Unit, *args, **kwargs): + super().__init__(*args, **kwargs) + self.local_unit = unit + self.component = unit + + def update_relation_data(self, relation_id: int, data: dict) -> None: + """This method makes no sense for a Other Peer Relation.""" + raise NotImplementedError("It's not possible to update data of another unit.") + + def delete_relation_data(self, relation_id: int, fields: List[str]) -> None: + """This method makes no sense for a Other Peer Relation.""" + raise NotImplementedError("It's not possible to delete data of another unit.") + + +class DataPeerOtherUnitEventHandlers(DataPeerEventHandlers): + """Requires-side of the relation.""" + + def __init__(self, charm: CharmBase, relation_data: DataPeerUnitData): + """Manager of base client relations.""" + unique_key = f"{relation_data.relation_name}-{relation_data.local_unit.name}" + super().__init__(charm, relation_data, unique_key=unique_key) + + +class DataPeerOtherUnit(DataPeerOtherUnitData, DataPeerOtherUnitEventHandlers): + """Unit databag representation for another unit than the executor.""" + + def __init__( + self, + unit: Unit, + charm: CharmBase, + relation_name: str, + additional_secret_fields: Optional[List[str]] = [], + additional_secret_group_mapping: Dict[str, str] = {}, + secret_field_name: Optional[str] = None, + deleted_label: Optional[str] = None, + ): + DataPeerOtherUnitData.__init__( + self, + unit, + charm.model, + relation_name, + additional_secret_fields, + additional_secret_group_mapping, + secret_field_name, + deleted_label, + ) + DataPeerOtherUnitEventHandlers.__init__(self, charm, self) + + +################################################################################ +# Cross-charm Relations Data Handling and Events +################################################################################ + +# Generic events + + +class RelationEventWithSecret(RelationEvent): + """Base class for Relation Events that need to handle secrets.""" + + @property + def _secrets(self) -> dict: + """Caching secrets to avoid fetching them each time a field is referrd. + + DON'T USE the encapsulated helper variable outside of this function + """ + if not hasattr(self, "_cached_secrets"): + self._cached_secrets = {} + return self._cached_secrets + + def _get_secret(self, group) -> Optional[Dict[str, str]]: + """Retrieving secrets.""" + if not self.app: + return + if not self._secrets.get(group): + self._secrets[group] = None + secret_field = f"{PROV_SECRET_PREFIX}{group}" + if secret_uri := self.relation.data[self.app].get(secret_field): + secret = self.framework.model.get_secret(id=secret_uri) + self._secrets[group] = secret.get_content() + return self._secrets[group] + + @property + def secrets_enabled(self): + """Is this Juju version allowing for Secrets usage?""" + return JujuVersion.from_environ().has_secrets + + +class EntityProvidesEvent(RelationEvent): + """Base class for data events.""" + + @property + def extra_user_roles(self) -> Optional[str]: + """Returns the extra user roles that were requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("extra-user-roles") + + @property + def extra_group_roles(self) -> Optional[str]: + """Returns the extra group roles that were requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("extra-group-roles") + + @property + def entity_type(self) -> Optional[str]: + """Returns the entity_type that were requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("entity-type") + + @property + def entity_permissions(self) -> Optional[str]: + """Returns the entity_permissions that were requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("entity-permissions") + + +class EntityRequiresEvent(RelationEventWithSecret): + """Base class for authentication fields for events. + + The amount of logic added here is not ideal -- but this was the only way to preserve + the interface when moving to Juju Secrets + """ + + @property + def entity_name(self) -> Optional[str]: + """Returns the name for the created entity.""" + if not self.relation.app: + return None + + if self.secrets_enabled: + secret = self._get_secret("entity") + if secret: + return secret.get("entity-name") + + return self.relation.data[self.relation.app].get("entity-name") + + @property + def entity_password(self) -> Optional[str]: + """Returns the password for the created entity.""" + if not self.relation.app: + return None + + if self.secrets_enabled: + secret = self._get_secret("entity") + if secret: + return secret.get("entity-password") + + return self.relation.data[self.relation.app].get("entity-password") + + +class AuthenticationEvent(RelationEventWithSecret): + """Base class for authentication fields for events. + + The amount of logic added here is not ideal -- but this was the only way to preserve + the interface when moving to Juju Secrets + """ + + @property + def username(self) -> Optional[str]: + """Returns the created username.""" + if not self.relation.app: + return None + + if self.secrets_enabled: + secret = self._get_secret("user") + if secret: + return secret.get("username") + + return self.relation.data[self.relation.app].get("username") + + @property + def password(self) -> Optional[str]: + """Returns the password for the created user.""" + if not self.relation.app: + return None + + if self.secrets_enabled: + secret = self._get_secret("user") + if secret: + return secret.get("password") + + return self.relation.data[self.relation.app].get("password") + + @property + def tls(self) -> Optional[str]: + """Returns whether TLS is configured.""" + if not self.relation.app: + return None + + if self.secrets_enabled: + secret = self._get_secret("tls") + if secret: + return secret.get("tls") + + return self.relation.data[self.relation.app].get("tls") + + @property + def tls_ca(self) -> Optional[str]: + """Returns TLS CA.""" + if not self.relation.app: + return None + + if self.secrets_enabled: + secret = self._get_secret("tls") + if secret: + return secret.get("tls-ca") + + return self.relation.data[self.relation.app].get("tls-ca") + + +# Database related events and fields + + +class DatabaseProvidesEvent(RelationEvent): + """Base class for database events.""" + + @property + def database(self) -> Optional[str]: + """Returns the database that was requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("database") + + +class DatabaseRequestedEvent(DatabaseProvidesEvent): + """Event emitted when a new database is requested for use on this relation.""" + + @property + def extra_user_roles(self) -> Optional[str]: + """Returns the extra user roles that were requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("extra-user-roles") + + @property + def external_node_connectivity(self) -> bool: + """Returns the requested external_node_connectivity field.""" + if not self.relation.app: + return False + + return ( + self.relation.data[self.relation.app].get("external-node-connectivity", "false") + == "true" + ) + + @property + def requested_entity_secret_content(self) -> Optional[Dict[str, Optional[str]]]: + """Returns the content of the requested entity secret.""" + names = None + if secret_uri := self.relation.data.get(self.relation.app, {}).get( + "requested-entity-secret" + ): + secret = self.framework.model.get_secret(id=secret_uri) + if content := secret.get_content(refresh=True): + if "entity-name" in content: + names = {content["entity-name"]: content.get("password")} + else: + logger.warning("Invalid requested-entity-secret: no entity name") + return names + + @property + def prefix_matching(self) -> Optional[str]: + """Returns the prefix matching strategy that were requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("prefix-matching") + + +class DatabaseEntityRequestedEvent(DatabaseProvidesEvent, EntityProvidesEvent): + """Event emitted when a new entity is requested for use on this relation.""" + + +class DatabaseEntityPermissionsChangedEvent(DatabaseProvidesEvent, EntityProvidesEvent): + """Event emitted when existing entity permissions are changed on this relation.""" + + +class DatabaseProvidesEvents(CharmEvents): + """Database events. + + This class defines the events that the database can emit. + """ + + database_requested = EventSource(DatabaseRequestedEvent) + database_entity_requested = EventSource(DatabaseEntityRequestedEvent) + database_entity_permissions_changed = EventSource(DatabaseEntityPermissionsChangedEvent) + + +class DatabaseRequiresEvent(RelationEventWithSecret): + """Base class for database events.""" + + @property + def database(self) -> Optional[str]: + """Returns the database name.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("database") + + @property + def endpoints(self) -> Optional[str]: + """Returns a comma separated list of read/write endpoints. + + In VM charms, this is the primary's address. + In kubernetes charms, this is the service to the primary pod. + """ + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("endpoints") + + @property + def read_only_endpoints(self) -> Optional[str]: + """Returns a comma separated list of read only endpoints. + + In VM charms, this is the address of all the secondary instances. + In kubernetes charms, this is the service to all replica pod instances. + """ + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("read-only-endpoints") + + @property + def replset(self) -> Optional[str]: + """Returns the replicaset name. + + MongoDB only. + """ + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("replset") + + @property + def uris(self) -> Optional[str]: + """Returns the connection URIs. + + MongoDB, Redis, OpenSearch. + """ + if not self.relation.app: + return None + + if self.secrets_enabled: + secret = self._get_secret("user") + if secret: + return secret.get("uris") + + return self.relation.data[self.relation.app].get("uris") + + @property + def read_only_uris(self) -> Optional[str]: + """Returns the readonly connection URIs.""" + if not self.relation.app: + return None + + if self.secrets_enabled: + secret = self._get_secret("user") + if secret: + return secret.get("read-only-uris") + + return self.relation.data[self.relation.app].get("read-only-uris") + + @property + def version(self) -> Optional[str]: + """Returns the version of the database. + + Version as informed by the database daemon. + """ + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("version") + + @property + def prefix_databases(self) -> Optional[List[str]]: + """Returns a list of databases matching a prefix.""" + if not self.relation.app: + return None + + if prefixed_databases := self.relation.data[self.relation.app].get("prefix-databases"): + return prefixed_databases.split(",") + return [] + + +class DatabaseCreatedEvent(AuthenticationEvent, DatabaseRequiresEvent): + """Event emitted when a new database is created for use on this relation.""" + + +class DatabaseEntityCreatedEvent(EntityRequiresEvent, DatabaseRequiresEvent): + """Event emitted when a new entity is created for use on this relation.""" + + +class DatabaseEndpointsChangedEvent(AuthenticationEvent, DatabaseRequiresEvent): + """Event emitted when the read/write endpoints are changed.""" + + +class DatabaseReadOnlyEndpointsChangedEvent(AuthenticationEvent, DatabaseRequiresEvent): + """Event emitted when the read only endpoints are changed.""" + + +class DatabasePrefixDatabasesChangedEvent(AuthenticationEvent, DatabaseRequiresEvent): + """Event emitted when the prefix databases are changed.""" + + +class DatabaseRequiresEvents(RequirerCharmEvents): + """Database events. + + This class defines the events that the database can emit. + """ + + database_created = EventSource(DatabaseCreatedEvent) + database_entity_created = EventSource(DatabaseEntityCreatedEvent) + endpoints_changed = EventSource(DatabaseEndpointsChangedEvent) + read_only_endpoints_changed = EventSource(DatabaseReadOnlyEndpointsChangedEvent) + prefix_databases_changed = EventSource(DatabasePrefixDatabasesChangedEvent) + + +# Database Provider and Requires + + +class DatabaseProviderData(ProviderData): + """Provider-side data of the database relations.""" + + def __init__( + self, model: Model, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + super().__init__(model, relation_name, status_schema_path=status_schema_path) + + def set_database(self, relation_id: int, database_name: str) -> None: + """Set database name. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + database_name: database name. + """ + self.update_relation_data(relation_id, {"database": database_name}) + + def set_prefix_databases(self, relation_id: int, databases: List[str]) -> None: + """Set a coma separated list of databases matching a prefix. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + databases: list of database names matching the requested prefix. + """ + self.update_relation_data(relation_id, {"prefix-databases": ",".join(sorted(databases))}) + + def set_endpoints(self, relation_id: int, connection_strings: str) -> None: + """Set database primary connections. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + In VM charms, only the primary's address should be passed as an endpoint. + In kubernetes charms, the service endpoint to the primary pod should be + passed as an endpoint. + + Args: + relation_id: the identifier for a particular relation. + connection_strings: database hosts and ports comma separated list. + """ + self.update_relation_data(relation_id, {"endpoints": connection_strings}) + + def set_read_only_endpoints(self, relation_id: int, connection_strings: str) -> None: + """Set database replicas connection strings. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + connection_strings: database hosts and ports comma separated list. + """ + self.update_relation_data(relation_id, {"read-only-endpoints": connection_strings}) + + def set_replset(self, relation_id: int, replset: str) -> None: + """Set replica set name in the application relation databag. + + MongoDB only. + + Args: + relation_id: the identifier for a particular relation. + replset: replica set name. + """ + self.update_relation_data(relation_id, {"replset": replset}) + + def set_uris(self, relation_id: int, uris: str) -> None: + """Set the database connection URIs in the application relation databag. + + MongoDB, Redis, and OpenSearch only. + + Args: + relation_id: the identifier for a particular relation. + uris: connection URIs. + """ + self.update_relation_data(relation_id, {"uris": uris}) + + def set_read_only_uris(self, relation_id: int, uris: str) -> None: + """Set the database readonly connection URIs in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + uris: connection URIs. + """ + self.update_relation_data(relation_id, {"read-only-uris": uris}) + + def set_version(self, relation_id: int, version: str) -> None: + """Set the database version in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + version: database version. + """ + self.update_relation_data(relation_id, {"version": version}) + + def set_subordinated(self, relation_id: int) -> None: + """Raises the subordinated flag in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + """ + self.update_relation_data(relation_id, {"subordinated": "true"}) + + +class DatabaseProviderEventHandlers(ProviderEventHandlers): + """Provider-side of the database relation handlers.""" + + on = DatabaseProvidesEvents() # pyright: ignore [reportAssignmentType] + + def __init__( + self, charm: CharmBase, relation_data: DatabaseProviderData, unique_key: str = "" + ): + """Manager of base client relations.""" + super().__init__(charm, relation_data, unique_key) + # Just to calm down pyright, it can't parse that the same type is being used in the super() call above + self.relation_data = relation_data + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation has changed.""" + super()._on_relation_changed_event(event) + # Leader only + if not self.relation_data.local_unit.is_leader(): + return + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + # Validate entity information is not dynamically changed + self._validate_entity_consistency(event, diff) + + # Emit a database requested event if the setup key (database name) + # was added to the relation databag, but the entity-type key was not. + if "database" in diff.added and "entity-type" not in diff.added: + getattr(self.on, "database_requested").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit an entity requested event if the setup key (database name) + # was added to the relation databag, in addition to the entity-type key. + if "database" in diff.added and "entity-type" in diff.added: + getattr(self.on, "database_entity_requested").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit a permissions changed event if the setup key (database name) + # was added to the relation databag, and the entity-permissions key changed. + if ( + "database" not in diff.added + and "entity-type" not in diff.added + and ("entity-permissions" in diff.added or "entity-permissions" in diff.changed) + ): + getattr(self.on, "database_entity_permissions_changed").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: + """Event emitted when the secret has changed.""" + pass + + +class DatabaseProvides(DatabaseProviderData, DatabaseProviderEventHandlers): + """Provider-side of the database relations.""" + + def __init__( + self, charm: CharmBase, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + DatabaseProviderData.__init__( + self, charm.model, relation_name, status_schema_path=status_schema_path + ) + DatabaseProviderEventHandlers.__init__(self, charm, self) + + +class DatabaseRequirerData(RequirerData): + """Requirer-side of the database relation.""" + + def __init__( + self, + model: Model, + relation_name: str, + database_name: str, + extra_user_roles: Optional[str] = None, + relations_aliases: Optional[List[str]] = None, + additional_secret_fields: Optional[List[str]] = [], + external_node_connectivity: bool = False, + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + requested_entity_secret: Optional[str] = None, + requested_entity_name: Optional[str] = None, + requested_entity_password: Optional[str] = None, + prefix_matching: Optional[str] = None, + ): + """Manager of database client relations.""" + super().__init__( + model, + relation_name, + extra_user_roles, + additional_secret_fields, + extra_group_roles, + entity_type, + entity_permissions, + requested_entity_secret, + requested_entity_name, + requested_entity_password, + prefix_matching, + ) + self.database = database_name + self.relations_aliases = relations_aliases + self.external_node_connectivity = external_node_connectivity + + def is_postgresql_plugin_enabled(self, plugin: str, relation_index: int = 0) -> bool: + """Returns whether a plugin is enabled in the database. + + Args: + plugin: name of the plugin to check. + relation_index: optional relation index to check the database + (default: 0 - first relation). + + PostgreSQL only. + """ + # Psycopg 3 is imported locally to avoid the need of its package installation + # when relating to a database charm other than PostgreSQL. + import psycopg + + # Return False if no relation is established. + if len(self.relations) == 0: + return False + + relation_id = self.relations[relation_index].id + host = self.fetch_relation_field(relation_id, "endpoints") + + # Return False if there is no endpoint available. + if host is None: + return False + + host = host.split(":")[0] + + content = self.fetch_relation_data([relation_id], ["username", "password"]).get( + relation_id, {} + ) + user = content.get("username") + password = content.get("password") + + connection_string = ( + f"host='{host}' dbname='{self.database}' user='{user}' password='{password}'" + ) + try: + with psycopg.connect(connection_string) as connection: + with connection.cursor() as cursor: + cursor.execute( + "SELECT TRUE FROM pg_extension WHERE extname=%s::text;", (plugin,) + ) + return cursor.fetchone() is not None + except psycopg.Error as e: + logger.exception( + f"failed to check whether {plugin} plugin is enabled in the database: %s", str(e) + ) + return False + + +class DatabaseRequirerEventHandlers(RequirerEventHandlers): + """Requires-side of the relation.""" + + on = DatabaseRequiresEvents() # pyright: ignore [reportAssignmentType] + + def __init__( + self, charm: CharmBase, relation_data: DatabaseRequirerData, unique_key: str = "" + ): + """Manager of base client relations.""" + super().__init__(charm, relation_data, unique_key) + # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above + self.relation_data = relation_data + + # Define custom event names for each alias. + if self.relation_data.relations_aliases: + # Ensure the number of aliases does not exceed the maximum + # of connections allowed in the specific relation. + relation_connection_limit = self.charm.meta.requires[ + self.relation_data.relation_name + ].limit + if len(self.relation_data.relations_aliases) != relation_connection_limit: + raise ValueError( + f"The number of aliases must match the maximum number of connections allowed in the relation. " + f"Expected {relation_connection_limit}, got {len(self.relation_data.relations_aliases)}" + ) + + if self.relation_data.relations_aliases: + for relation_alias in self.relation_data.relations_aliases: + self.on.define_event( + f"{relation_alias}_database_created", + DatabaseCreatedEvent, + ) + self.on.define_event( + f"{relation_alias}_database_entity_created", + DatabaseEntityCreatedEvent, + ) + self.on.define_event( + f"{relation_alias}_endpoints_changed", + DatabaseEndpointsChangedEvent, + ) + self.on.define_event( + f"{relation_alias}_read_only_endpoints_changed", + DatabaseReadOnlyEndpointsChangedEvent, + ) + self.on.define_event( + f"{relation_alias}_prefix_databases_changed", + DatabasePrefixDatabasesChangedEvent, + ) + + def _on_secret_changed_event(self, event: SecretChangedEvent): + """Event notifying about a new value of a secret.""" + pass + + def _assign_relation_alias(self, relation_id: int) -> None: + """Assigns an alias to a relation. + + This function writes in the unit data bag. + + Args: + relation_id: the identifier for a particular relation. + """ + # If no aliases were provided, return immediately. + if not self.relation_data.relations_aliases: + return + + # Return if an alias was already assigned to this relation + # (like when there are more than one unit joining the relation). + relation = self.charm.model.get_relation(self.relation_data.relation_name, relation_id) + if relation and relation.data[self.relation_data.local_unit].get("alias"): + return + + # Retrieve the available aliases (the ones that weren't assigned to any relation). + available_aliases = self.relation_data.relations_aliases[:] + for relation in self.charm.model.relations[self.relation_data.relation_name]: + alias = relation.data[self.relation_data.local_unit].get("alias") + if alias: + logger.debug("Alias %s was already assigned to relation %d", alias, relation.id) + available_aliases.remove(alias) + + # Set the alias in the unit relation databag of the specific relation. + relation = self.charm.model.get_relation(self.relation_data.relation_name, relation_id) + if relation: + relation.data[self.relation_data.local_unit].update({"alias": available_aliases[0]}) + + # We need to set relation alias also on the application level so, + # it will be accessible in show-unit juju command, executed for a consumer application unit + if self.relation_data.local_unit.is_leader(): + self.relation_data.update_relation_data(relation_id, {"alias": available_aliases[0]}) + + def _emit_aliased_event(self, event: RelationChangedEvent, event_name: str) -> None: + """Emit an aliased event to a particular relation if it has an alias. + + Args: + event: the relation changed event that was received. + event_name: the name of the event to emit. + """ + alias = self._get_relation_alias(event.relation.id) + if alias: + getattr(self.on, f"{alias}_{event_name}").emit( + event.relation, app=event.app, unit=event.unit + ) + + def _get_relation_alias(self, relation_id: int) -> Optional[str]: + """Returns the relation alias. + + Args: + relation_id: the identifier for a particular relation. + + Returns: + the relation alias or None if the relation was not found. + """ + for relation in self.charm.model.relations[self.relation_data.relation_name]: + if relation.id == relation_id: + return relation.data[self.relation_data.local_unit].get("alias") + return None + + def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: + """Event emitted when the database relation is created.""" + super()._on_relation_created_event(event) + + # If relations aliases were provided, assign one to the relation. + self._assign_relation_alias(event.relation.id) + + # Sets both database and extra user roles in the relation + # if the roles are provided. Otherwise, sets only the database. + if not self.relation_data.local_unit.is_leader(): + return + + event_data = {"database": self.relation_data.database} + + if self.relation_data.extra_user_roles: + event_data["extra-user-roles"] = self.relation_data.extra_user_roles + if self.relation_data.extra_group_roles: + event_data["extra-group-roles"] = self.relation_data.extra_group_roles + if self.relation_data.entity_type: + event_data["entity-type"] = self.relation_data.entity_type + if self.relation_data.entity_permissions: + event_data["entity-permissions"] = self.relation_data.entity_permissions + if self.relation_data.requested_entity_secret: + event_data["requested-entity-secret"] = self.relation_data.requested_entity_secret + if self.relation_data.prefix_matching: + event_data["prefix-matching"] = self.relation_data.prefix_matching + + # Create helper secret if needed + if ( + self.relation_data.requested_entity_name + and not self.relation_data.requested_entity_secret + ): + content = {"entity-name": self.relation_data.requested_entity_name} + if self.relation_data.requested_entity_password: + content["password"] = self.relation_data.requested_entity_password + secret = self.charm.app.add_secret( + content, label=f"{self.model.uuid}-{event.relation.id}-requested-entity" + ) + secret.grant(event.relation) + if not secret.id: + raise SecretError("Secret helper missing Id") + event_data["requested-entity-secret"] = secret.id + + # set external-node-connectivity field + if self.relation_data.external_node_connectivity: + event_data["external-node-connectivity"] = "true" + + self.relation_data.update_relation_data(event.relation.id, event_data) + + def _clear_helper_secret(self, event: RelationChangedEvent, app_databag: Dict) -> None: + """Remove helper secret if set.""" + if ( + self.relation_data.local_unit.is_leader() + and self.relation_data.requested_entity_name + and (secret_uri := app_databag.get("requested-entity-secret")) + ): + try: + secret = self.framework.model.get_secret(id=secret_uri) + secret.remove_all_revisions() + except ModelError: + logger.debug("Unable to remove helper secret") + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the database relation has changed.""" + super()._on_relation_changed_event(event) + is_subordinate = False + remote_unit_data = None + for key in event.relation.data.keys(): + if isinstance(key, Unit) and not key.name.startswith(self.charm.app.name): + remote_unit_data = event.relation.data[key] + elif isinstance(key, Application) and key.name != self.charm.app.name: + is_subordinate = event.relation.data[key].get("subordinated") == "true" + + if is_subordinate: + if not remote_unit_data or remote_unit_data.get("state") != "ready": + return + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + # Register all new secrets with their labels + if any(newval for newval in diff.added if self.relation_data._is_secret_field(newval)): + self.relation_data._register_secrets_to_relation(event.relation, diff.added) + + app_databag = get_encoded_dict(event.relation, event.app, "data") + if app_databag is None: + app_databag = {} + + # Check if the database is created + # (the database charm shared the credentials). + if self._main_credentials_shared(diff) and "entity-type" not in app_databag: + # Emit the default event (the one without an alias). + logger.info("database created at %s", datetime.now()) + getattr(self.on, "database_created").emit( + event.relation, app=event.app, unit=event.unit + ) + + # Emit the aliased event (if any). + self._emit_aliased_event(event, "database_created") + self._clear_helper_secret(event, app_databag) + + # To avoid unnecessary application restarts do not trigger other events. + return + + if self._entity_credentials_shared(diff) and "entity-type" in app_databag: + # Emit the default event (the one without an alias). + logger.info("entity created at %s", datetime.now()) + getattr(self.on, "database_entity_created").emit( + event.relation, app=event.app, unit=event.unit + ) + + # Emit the aliased event (if any). + self._emit_aliased_event(event, "database_entity_created") + self._clear_helper_secret(event, app_databag) + + # To avoid unnecessary application restarts do not trigger other events. + return + + for key, event_name in [ + ("endpoints", "endpoints_changed"), + ("read-only-endpoints", "read_only_endpoints_changed"), + ("prefix-databases", "prefix_databases_changed"), + ]: + # Emit a change event if the key changed. + if key in diff.added or key in diff.changed: + # Emit the default event (the one without an alias). + logger.info("%s changed on %s", key, datetime.now()) + getattr(self.on, event_name).emit(event.relation, app=event.app, unit=event.unit) + + # Emit the aliased event (if any). + self._emit_aliased_event(event, event_name) + + # To avoid unnecessary application restarts do not trigger other events. + return + + +class DatabaseRequires(DatabaseRequirerData, DatabaseRequirerEventHandlers): + """Provider-side of the database relations.""" + + def __init__( + self, + charm: CharmBase, + relation_name: str, + database_name: str, + extra_user_roles: Optional[str] = None, + relations_aliases: Optional[List[str]] = None, + additional_secret_fields: Optional[List[str]] = [], + external_node_connectivity: bool = False, + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + requested_entity_secret: Optional[str] = None, + requested_entity_name: Optional[str] = None, + requested_entity_password: Optional[str] = None, + prefix_matching: Optional[str] = None, + ): + DatabaseRequirerData.__init__( + self, + charm.model, + relation_name, + database_name, + extra_user_roles, + relations_aliases, + additional_secret_fields, + external_node_connectivity, + extra_group_roles, + entity_type, + entity_permissions, + requested_entity_secret, + requested_entity_name, + requested_entity_password, + prefix_matching, + ) + DatabaseRequirerEventHandlers.__init__(self, charm, self) + + +################################################################################ +# Charm-specific Relations Data and Events +################################################################################ + +# Kafka Events + + +class KafkaProvidesEvent(RelationEventWithSecret): + """Base class for Kafka events.""" + + @property + def topic(self) -> Optional[str]: + """Returns the topic that was requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("topic") + + @property + def consumer_group_prefix(self) -> Optional[str]: + """Returns the consumer-group-prefix that was requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("consumer-group-prefix") + + @property + def mtls_cert(self) -> Optional[str]: + """Returns TLS cert of the client.""" + if not self.relation.app: + return None + + if not self.secrets_enabled: + raise SecretsUnavailableError("Secrets unavailable on current Juju version") + + secret_field = f"{PROV_SECRET_PREFIX}{SECRET_GROUPS.MTLS}" + if secret_uri := self.relation.data[self.app].get(secret_field): + secret = self.framework.model.get_secret(id=secret_uri) + content = secret.get_content(refresh=True) + if content: + return content.get("mtls-cert") + + +class KafkaClientMtlsCertUpdatedEvent(KafkaProvidesEvent): + """Event emitted when the mtls relation is updated.""" + + def __init__(self, handle, relation, old_mtls_cert: Optional[str] = None, app=None, unit=None): + super().__init__(handle, relation, app, unit) + + self.old_mtls_cert = old_mtls_cert + + def snapshot(self): + """Return a snapshot of the event.""" + return super().snapshot() | {"old_mtls_cert": self.old_mtls_cert} + + def restore(self, snapshot): + """Restore the event from a snapshot.""" + super().restore(snapshot) + self.old_mtls_cert = snapshot["old_mtls_cert"] + + +class TopicRequestedEvent(KafkaProvidesEvent): + """Event emitted when a new topic is requested for use on this relation.""" + + @property + def extra_user_roles(self) -> Optional[str]: + """Returns the extra user roles that were requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("extra-user-roles") + + +class TopicEntityRequestedEvent(KafkaProvidesEvent, EntityProvidesEvent): + """Event emitted when a new entity is requested for use on this relation.""" + + +class TopicEntityPermissionsChangedEvent(KafkaProvidesEvent, EntityProvidesEvent): + """Event emitted when existing entity permissions are changed on this relation.""" + + +class KafkaProvidesEvents(CharmEvents): + """Kafka events. + + This class defines the events that the Kafka can emit. + """ + + topic_requested = EventSource(TopicRequestedEvent) + topic_entity_requested = EventSource(TopicEntityRequestedEvent) + topic_entity_permissions_changed = EventSource(TopicEntityPermissionsChangedEvent) + mtls_cert_updated = EventSource(KafkaClientMtlsCertUpdatedEvent) + + +class KafkaRequiresEvent(RelationEvent): + """Base class for Kafka events.""" + + @property + def topic(self) -> Optional[str]: + """Returns the topic.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("topic") + + @property + def bootstrap_server(self) -> Optional[str]: + """Returns a comma-separated list of broker uris.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("endpoints") + + @property + def consumer_group_prefix(self) -> Optional[str]: + """Returns the consumer-group-prefix.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("consumer-group-prefix") + + @property + def zookeeper_uris(self) -> Optional[str]: + """Returns a comma separated list of Zookeeper uris.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("zookeeper-uris") + + +class TopicCreatedEvent(AuthenticationEvent, KafkaRequiresEvent): + """Event emitted when a new topic is created for use on this relation.""" + + +class TopicEntityCreatedEvent(EntityRequiresEvent, KafkaRequiresEvent): + """Event emitted when a new entity is created for use on this relation.""" + + +class BootstrapServerChangedEvent(AuthenticationEvent, KafkaRequiresEvent): + """Event emitted when the bootstrap server is changed.""" + + +class KafkaRequiresEvents(RequirerCharmEvents): + """Kafka events. + + This class defines the events that the Kafka can emit. + """ + + topic_created = EventSource(TopicCreatedEvent) + topic_entity_created = EventSource(TopicEntityCreatedEvent) + bootstrap_server_changed = EventSource(BootstrapServerChangedEvent) + + +# Kafka Provides and Requires + + +class KafkaProviderData(ProviderData): + """Provider-side of the Kafka relation.""" + + RESOURCE_FIELD = "topic" + + def __init__( + self, model: Model, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + super().__init__(model, relation_name, status_schema_path=status_schema_path) + + def set_topic(self, relation_id: int, topic: str) -> None: + """Set topic name in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + topic: the topic name. + """ + self.update_relation_data(relation_id, {"topic": topic}) + + def set_bootstrap_server(self, relation_id: int, bootstrap_server: str) -> None: + """Set the bootstrap server in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + bootstrap_server: the bootstrap server address. + """ + self.update_relation_data(relation_id, {"endpoints": bootstrap_server}) + + def set_consumer_group_prefix(self, relation_id: int, consumer_group_prefix: str) -> None: + """Set the consumer group prefix in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + consumer_group_prefix: the consumer group prefix string. + """ + self.update_relation_data(relation_id, {"consumer-group-prefix": consumer_group_prefix}) + + def set_zookeeper_uris(self, relation_id: int, zookeeper_uris: str) -> None: + """Set the zookeeper uris in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + zookeeper_uris: comma-separated list of ZooKeeper server uris. + """ + self.update_relation_data(relation_id, {"zookeeper-uris": zookeeper_uris}) + + +class KafkaProviderEventHandlers(ProviderEventHandlers): + """Provider-side of the Kafka relation.""" + + on = KafkaProvidesEvents() # pyright: ignore [reportAssignmentType] + + def __init__(self, charm: CharmBase, relation_data: KafkaProviderData) -> None: + super().__init__(charm, relation_data) + # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above + self.relation_data = relation_data + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation has changed.""" + super()._on_relation_changed_event(event) + + new_data_keys = list(event.relation.data[event.app].keys()) + if any(newval for newval in new_data_keys if self.relation_data._is_secret_field(newval)): + self.relation_data._register_secrets_to_relation(event.relation, new_data_keys) + + getattr(self.on, "mtls_cert_updated").emit(event.relation, app=event.app, unit=event.unit) + + # Leader only + if not self.relation_data.local_unit.is_leader(): + return + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + # Validate entity information is not dynamically changed + self._validate_entity_consistency(event, diff) + + # Emit a topic requested event if the setup key (topic name) + # was added to the relation databag, but the entity-type key was not. + if "topic" in diff.added and "entity-type" not in diff.added: + getattr(self.on, "topic_requested").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit an entity requested event if the setup key (topic name) + # was added to the relation databag, in addition to the entity-type key. + if "topic" in diff.added and "entity-type" in diff.added: + getattr(self.on, "topic_entity_requested").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit a permissions changed event if the setup key (topic name) + # was added to the relation databag, and the entity-permissions key changed. + if ( + "topic" not in diff.added + and "entity-type" not in diff.added + and ("entity-permissions" in diff.added or "entity-permissions" in diff.changed) + ): + getattr(self.on, "topic_entity_permissions_changed").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + def _on_secret_changed_event(self, event: SecretChangedEvent): + """Event notifying about a new value of a secret.""" + if not event.secret.label: + return + + relation = self.relation_data._relation_from_secret_label(event.secret.label) + if not relation: + logging.info( + f"Received secret {event.secret.label} but couldn't parse, seems irrelevant" + ) + return + + if relation.app == self.charm.app: + logging.info("Secret changed event ignored for Secret Owner") + + if relation.name != self.relation_data.relation_name: + logger.debug( + "Ignoring secret-changed from endpoint %s (expected %s)", + relation.name, + self.relation_data.relation_name, + ) + return + + remote_unit = None + for unit in relation.units: + if unit.app != self.charm.app: + remote_unit = unit + + old_mtls_cert = event.secret.get_content().get("mtls-cert") + # mtls-cert is the only secret that can be updated + logger.info("mtls-cert updated") + getattr(self.on, "mtls_cert_updated").emit( + relation, app=relation.app, unit=remote_unit, old_mtls_cert=old_mtls_cert + ) + + +class KafkaProvides(KafkaProviderData, KafkaProviderEventHandlers): + """Provider-side of the Kafka relation.""" + + def __init__( + self, charm: CharmBase, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + KafkaProviderData.__init__( + self, charm.model, relation_name, status_schema_path=status_schema_path + ) + KafkaProviderEventHandlers.__init__(self, charm, self) + + +class KafkaRequirerData(RequirerData): + """Requirer-side of the Kafka relation.""" + + def __init__( + self, + model: Model, + relation_name: str, + topic: str, + extra_user_roles: Optional[str] = None, + consumer_group_prefix: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + mtls_cert: Optional[str] = None, + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + ): + """Manager of Kafka client relations.""" + super().__init__( + model, + relation_name, + extra_user_roles, + additional_secret_fields, + extra_group_roles, + entity_type, + entity_permissions, + ) + self.topic = topic + self.consumer_group_prefix = consumer_group_prefix or "" + self.mtls_cert = mtls_cert + + @staticmethod + def is_topic_value_acceptable(topic_value: str) -> bool: + """Check whether the given Kafka topic value is acceptable.""" + return "*" not in topic_value[:3] + + @property + def topic(self): + """Topic to use in Kafka.""" + return self._topic + + @topic.setter + def topic(self, value): + if not self.is_topic_value_acceptable(value): + raise ValueError(f"Error on topic '{value}', unacceptable value.") + self._topic = value + + def set_mtls_cert(self, relation_id: int, mtls_cert: str) -> None: + """Set the mtls cert in the application relation databag / secret. + + Args: + relation_id: the identifier for a particular relation. + mtls_cert: mtls cert. + """ + self.update_relation_data(relation_id, {"mtls-cert": mtls_cert}) + + +class KafkaRequirerEventHandlers(RequirerEventHandlers): + """Requires-side of the Kafka relation.""" + + on = KafkaRequiresEvents() # pyright: ignore [reportAssignmentType] + + def __init__(self, charm: CharmBase, relation_data: KafkaRequirerData) -> None: + super().__init__(charm, relation_data) + # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above + self.relation_data = relation_data + + def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: + """Event emitted when the Kafka relation is created.""" + super()._on_relation_created_event(event) + + if not self.relation_data.local_unit.is_leader(): + return + + # Sets topic, extra user roles, and "consumer-group-prefix" in the relation + relation_data = {"topic": self.relation_data.topic} + + if self.relation_data.mtls_cert: + relation_data["mtls-cert"] = self.relation_data.mtls_cert + + if self.relation_data.consumer_group_prefix: + relation_data["consumer-group-prefix"] = self.relation_data.consumer_group_prefix + + if self.relation_data.extra_user_roles: + relation_data["extra-user-roles"] = self.relation_data.extra_user_roles + if self.relation_data.extra_group_roles: + relation_data["extra-group-roles"] = self.relation_data.extra_group_roles + if self.relation_data.entity_type: + relation_data["entity-type"] = self.relation_data.entity_type + if self.relation_data.entity_permissions: + relation_data["entity-permissions"] = self.relation_data.entity_permissions + + self.relation_data.update_relation_data(event.relation.id, relation_data) + + def _on_secret_changed_event(self, event: SecretChangedEvent): + """Event notifying about a new value of a secret.""" + pass + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the Kafka relation has changed.""" + super()._on_relation_changed_event(event) + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + # Check if the topic is created + # (the Kafka charm shared the credentials). + + # Register all new secrets with their labels + if any(newval for newval in diff.added if self.relation_data._is_secret_field(newval)): + self.relation_data._register_secrets_to_relation(event.relation, diff.added) + + app_databag = get_encoded_dict(event.relation, event.app, "data") + if app_databag is None: + app_databag = {} + + if self._main_credentials_shared(diff) and "entity-type" not in app_databag: + # Emit the default event (the one without an alias). + logger.info("topic created at %s", datetime.now()) + getattr(self.on, "topic_created").emit(event.relation, app=event.app, unit=event.unit) + + # To avoid unnecessary application restarts do not trigger other events. + return + + if self._entity_credentials_shared(diff) and "entity-type" in app_databag: + # Emit the default event (the one without an alias). + logger.info("entity created at %s", datetime.now()) + getattr(self.on, "topic_entity_created").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit an endpoints (bootstrap-server) changed event if the Kafka endpoints + # added or changed this info in the relation databag. + if "endpoints" in diff.added or "endpoints" in diff.changed: + # Emit the default event (the one without an alias). + logger.info("endpoints changed on %s", datetime.now()) + getattr(self.on, "bootstrap_server_changed").emit( + event.relation, app=event.app, unit=event.unit + ) # here check if this is the right design + + # To avoid unnecessary application restarts do not trigger other events. + return + + +class KafkaRequires(KafkaRequirerData, KafkaRequirerEventHandlers): + """Provider-side of the Kafka relation.""" + + def __init__( + self, + charm: CharmBase, + relation_name: str, + topic: str, + extra_user_roles: Optional[str] = None, + consumer_group_prefix: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + mtls_cert: Optional[str] = None, + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + ) -> None: + KafkaRequirerData.__init__( + self, + charm.model, + relation_name, + topic, + extra_user_roles=extra_user_roles, + consumer_group_prefix=consumer_group_prefix, + additional_secret_fields=additional_secret_fields, + mtls_cert=mtls_cert, + extra_group_roles=extra_group_roles, + entity_type=entity_type, + entity_permissions=entity_permissions, + ) + KafkaRequirerEventHandlers.__init__(self, charm, self) + + +# Karapace related events + + +class KarapaceProvidesEvent(RelationEvent): + """Base class for Karapace events.""" + + @property + def subject(self) -> Optional[str]: + """Returns the subject that was requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("subject") + + +class SubjectRequestedEvent(KarapaceProvidesEvent): + """Event emitted when a new subject is requested for use on this relation.""" + + @property + def extra_user_roles(self) -> Optional[str]: + """Returns the extra user roles that were requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("extra-user-roles") + + +class SubjectEntityRequestedEvent(KarapaceProvidesEvent, EntityProvidesEvent): + """Event emitted when a new entity is requested for use on this relation.""" + + +class SubjectEntityPermissionsChangedEvent(KarapaceProvidesEvent, EntityProvidesEvent): + """Event emitted when existing entity permissions are changed on this relation.""" + + +class KarapaceProvidesEvents(CharmEvents): + """Karapace events. + + This class defines the events that the Karapace can emit. + """ + + subject_requested = EventSource(SubjectRequestedEvent) + subject_entity_requested = EventSource(SubjectEntityRequestedEvent) + subject_entity_permissions_changed = EventSource(SubjectEntityPermissionsChangedEvent) + + +class KarapaceRequiresEvent(RelationEvent): + """Base class for Karapace events.""" + + @property + def subject(self) -> Optional[str]: + """Returns the subject.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("subject") + + @property + def endpoints(self) -> Optional[str]: + """Returns a comma-separated list of broker uris.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("endpoints") + + +class SubjectAllowedEvent(AuthenticationEvent, KarapaceRequiresEvent): + """Event emitted when a new subject ACL is created for use on this relation.""" + + +class SubjectEntityCreatedEvent(EntityRequiresEvent, KarapaceRequiresEvent): + """Event emitted when a new entity is created for use on this relation.""" + + +class EndpointsChangedEvent(AuthenticationEvent, KarapaceRequiresEvent): + """Event emitted when the endpoints are changed.""" + + +class KarapaceRequiresEvents(RequirerCharmEvents): + """Karapace events. + + This class defines the events that Karapace can emit. + """ + + subject_allowed = EventSource(SubjectAllowedEvent) + subject_entity_created = EventSource(SubjectEntityCreatedEvent) + server_changed = EventSource(EndpointsChangedEvent) + + +# Karapace Provides and Requires + + +class KarapaceProviderData(ProviderData): + """Provider-side of the Karapace relation.""" + + RESOURCE_FIELD = "subject" + + def __init__( + self, model: Model, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + super().__init__(model, relation_name, status_schema_path=status_schema_path) + + def set_subject(self, relation_id: int, subject: str) -> None: + """Set subject name in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + subject: the subject name. + """ + self.update_relation_data(relation_id, {"subject": subject}) + + def set_endpoint(self, relation_id: int, endpoint: str) -> None: + """Set the endpoint in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + endpoint: the server address. + """ + self.update_relation_data(relation_id, {"endpoints": endpoint}) + + +class KarapaceProviderEventHandlers(ProviderEventHandlers): + """Provider-side of the Karapace relation.""" + + on = KarapaceProvidesEvents() # pyright: ignore [reportAssignmentType] + + def __init__(self, charm: CharmBase, relation_data: KarapaceProviderData) -> None: + super().__init__(charm, relation_data) + # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above + self.relation_data = relation_data + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation has changed.""" + super()._on_relation_changed_event(event) + + # Leader only + if not self.relation_data.local_unit.is_leader(): + return + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + # Validate entity information is not dynamically changed + self._validate_entity_consistency(event, diff) + + # Emit a subject requested event if the setup key (subject name) + # was added to the relation databag, but the entity-type key was not. + if "subject" in diff.added and "entity-type" not in diff.added: + getattr(self.on, "subject_requested").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit an entity requested event if the setup key (subject name) + # was added to the relation databag, in addition to the entity-type key. + if "subject" in diff.added and "entity-type" in diff.added: + getattr(self.on, "subject_entity_requested").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit a permissions changed event if the setup key (subject name) + # was added to the relation databag, and the entity-permissions key changed. + if ( + "subject" not in diff.added + and "entity-type" not in diff.added + and ("entity-permissions" in diff.added or "entity-permissions" in diff.changed) + ): + getattr(self.on, "subject_entity_permissions_changed").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + def _on_secret_changed_event(self, event: SecretChangedEvent): + """Event notifying about a new value of a secret.""" + pass + + +class KarapaceProvides(KarapaceProviderData, KarapaceProviderEventHandlers): + """Provider-side of the Karapace relation.""" + + def __init__( + self, charm: CharmBase, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + KarapaceProviderData.__init__( + self, charm.model, relation_name, status_schema_path=status_schema_path + ) + KarapaceProviderEventHandlers.__init__(self, charm, self) + + +class KarapaceRequirerData(RequirerData): + """Requirer-side of the Karapace relation.""" + + def __init__( + self, + model: Model, + relation_name: str, + subject: str, + extra_user_roles: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + ): + """Manager of Karapace client relations.""" + super().__init__( + model, + relation_name, + extra_user_roles, + additional_secret_fields, + extra_group_roles, + entity_type, + entity_permissions, + ) + self.subject = subject + + @property + def subject(self): + """Topic to use in Karapace.""" + return self._subject + + @subject.setter + def subject(self, value): + # Avoid wildcards + if value == "*": + raise ValueError(f"Error on subject '{value}', cannot be a wildcard.") + self._subject = value + + +class KarapaceRequirerEventHandlers(RequirerEventHandlers): + """Requires-side of the Karapace relation.""" + + on = KarapaceRequiresEvents() # pyright: ignore [reportAssignmentType] + + def __init__(self, charm: CharmBase, relation_data: KarapaceRequirerData) -> None: + super().__init__(charm, relation_data) + # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above + self.relation_data = relation_data + + def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: + """Event emitted when the Karapace relation is created.""" + super()._on_relation_created_event(event) + + if not self.relation_data.local_unit.is_leader(): + return + + # Sets subject and extra user roles + relation_data = {"subject": self.relation_data.subject} + + if self.relation_data.extra_user_roles: + relation_data["extra-user-roles"] = self.relation_data.extra_user_roles + if self.relation_data.extra_group_roles: + relation_data["extra-group-roles"] = self.relation_data.extra_group_roles + if self.relation_data.entity_type: + relation_data["entity-type"] = self.relation_data.entity_type + if self.relation_data.entity_permissions: + relation_data["entity-permissions"] = self.relation_data.entity_permissions + + self.relation_data.update_relation_data(event.relation.id, relation_data) + + def _on_secret_changed_event(self, event: SecretChangedEvent): + """Event notifying about a new value of a secret.""" + pass + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the Karapace relation has changed.""" + super()._on_relation_changed_event(event) + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + # Check if the subject ACLs are created + # (the Karapace charm shared the credentials). + + # Register all new secrets with their labels + if any(newval for newval in diff.added if self.relation_data._is_secret_field(newval)): + self.relation_data._register_secrets_to_relation(event.relation, diff.added) + + app_databag = get_encoded_dict(event.relation, event.app, "data") + if app_databag is None: + app_databag = {} + + if self._main_credentials_shared(diff) and "entity-type" not in app_databag: + # Emit the default event (the one without an alias). + logger.info("subject ACL created at %s", datetime.now()) + getattr(self.on, "subject_allowed").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + if self._entity_credentials_shared(diff) and "entity-type" in app_databag: + # Emit the default event (the one without an alias). + logger.info("entity created at %s", datetime.now()) + getattr(self.on, "subject_entity_created").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit an endpoints changed event if the Karapace endpoints added or changed + # this info in the relation databag. + if "endpoints" in diff.added or "endpoints" in diff.changed: + # Emit the default event (the one without an alias). + logger.info("endpoints changed on %s", datetime.now()) + getattr(self.on, "server_changed").emit( + event.relation, app=event.app, unit=event.unit + ) # here check if this is the right design + + # To avoid unnecessary application restarts do not trigger other events. + return + + +class KarapaceRequires(KarapaceRequirerData, KarapaceRequirerEventHandlers): + """Provider-side of the Karapace relation.""" + + def __init__( + self, + charm: CharmBase, + relation_name: str, + subject: str, + extra_user_roles: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + ) -> None: + KarapaceRequirerData.__init__( + self, + charm.model, + relation_name, + subject, + extra_user_roles, + additional_secret_fields, + extra_group_roles, + entity_type, + entity_permissions, + ) + KarapaceRequirerEventHandlers.__init__(self, charm, self) + + +# Kafka Connect Events + + +class KafkaConnectProvidesEvent(RelationEvent): + """Base class for Kafka Connect Provider events.""" + + @property + def plugin_url(self) -> Optional[str]: + """Returns the REST endpoint URL which serves the connector plugin.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("plugin-url") + + +class IntegrationRequestedEvent(KafkaConnectProvidesEvent): + """Event emitted when a new integrator boots up and is ready to serve the connector plugin.""" + + +class KafkaConnectProvidesEvents(CharmEvents): + """Kafka Connect Provider Events.""" + + integration_requested = EventSource(IntegrationRequestedEvent) + + +class KafkaConnectRequiresEvent(AuthenticationEvent): + """Base class for Kafka Connect Requirer events.""" + + @property + def plugin_url(self) -> Optional[str]: + """Returns the REST endpoint URL which serves the connector plugin.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("plugin-url") + + +class IntegrationCreatedEvent(KafkaConnectRequiresEvent): + """Event emitted when the credentials are created for this integrator.""" + + +class IntegrationEndpointsChangedEvent(KafkaConnectRequiresEvent): + """Event emitted when Kafka Connect REST endpoints change.""" + + +class KafkaConnectRequiresEvents(RequirerCharmEvents): + """Kafka Connect Requirer Events.""" + + integration_created = EventSource(IntegrationCreatedEvent) + integration_endpoints_changed = EventSource(IntegrationEndpointsChangedEvent) + + +class KafkaConnectProviderData(ProviderData): + """Provider-side of the Kafka Connect relation.""" + + RESOURCE_FIELD = "plugin-url" + + def __init__( + self, model: Model, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + super().__init__(model, relation_name, status_schema_path=status_schema_path) + + def set_endpoints(self, relation_id: int, endpoints: str) -> None: + """Sets REST endpoints of the Kafka Connect service.""" + self.update_relation_data(relation_id, {"endpoints": endpoints}) + + +class KafkaConnectProviderEventHandlers(EventHandlers): + """Provider-side implementation of the Kafka Connect event handlers.""" + + on = KafkaConnectProvidesEvents() # pyright: ignore [reportAssignmentType] + + def __init__(self, charm: CharmBase, relation_data: KafkaConnectProviderData) -> None: + super().__init__(charm, relation_data) + self.relation_data = relation_data + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation has changed.""" + # Leader only + if not self.relation_data.local_unit.is_leader(): + return + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + if "plugin-url" in diff.added: + getattr(self.on, "integration_requested").emit( + event.relation, app=event.app, unit=event.unit + ) + + def _on_secret_changed_event(self, event: SecretChangedEvent): + """Event notifying about a new value of a secret.""" + pass + + +class KafkaConnectProvides(KafkaConnectProviderData, KafkaConnectProviderEventHandlers): + """Provider-side implementation of the Kafka Connect relation.""" + + def __init__( + self, charm: CharmBase, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + KafkaConnectProviderData.__init__( + self, charm.model, relation_name, status_schema_path=status_schema_path + ) + KafkaConnectProviderEventHandlers.__init__(self, charm, self) + + +# Sentinel value passed from Kafka Connect requirer side when it does not need to serve any plugins. +PLUGIN_URL_NOT_REQUIRED: Final[str] = "NOT-REQUIRED" + + +class KafkaConnectRequirerData(RequirerData): + """Requirer-side of the Kafka Connect relation.""" + + def __init__( + self, + model: Model, + relation_name: str, + plugin_url: str, + extra_user_roles: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + ): + """Manager of Kafka client relations.""" + super().__init__( + model, + relation_name, + extra_user_roles=extra_user_roles, + additional_secret_fields=additional_secret_fields, + ) + self.plugin_url = plugin_url + + @property + def plugin_url(self): + """The REST endpoint URL which serves the connector plugin.""" + return self._plugin_url + + @plugin_url.setter + def plugin_url(self, value): + self._plugin_url = value + + +class KafkaConnectRequirerEventHandlers(RequirerEventHandlers): + """Requirer-side of the Kafka Connect relation.""" + + on = KafkaConnectRequiresEvents() # pyright: ignore [reportAssignmentType] + + def __init__(self, charm: CharmBase, relation_data: KafkaConnectRequirerData) -> None: + super().__init__(charm, relation_data) + self.relation_data = relation_data + + def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: + """Event emitted when the Kafka Connect relation is created.""" + super()._on_relation_created_event(event) + + if not self.relation_data.local_unit.is_leader(): + return + + relation_data = {"plugin-url": self.relation_data.plugin_url} + self.relation_data.update_relation_data(event.relation.id, relation_data) + + def _on_secret_changed_event(self, event: SecretChangedEvent): + """Event notifying about a new value of a secret.""" + pass + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the Kafka Connect relation has changed.""" + super()._on_relation_changed_event(event) + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + # Register all new secrets with their labels + if any(newval for newval in diff.added if self.relation_data._is_secret_field(newval)): + self.relation_data._register_secrets_to_relation(event.relation, diff.added) + + if self._main_credentials_shared(diff): + logger.info("integration created at %s", datetime.now()) + getattr(self.on, "integration_created").emit( + event.relation, app=event.app, unit=event.unit + ) + return + + # Emit an endpoints changed event if the provider added or + # changed this info in the relation databag. + if "endpoints" in diff.added or "endpoints" in diff.changed: + # Emit the default event (the one without an alias). + logger.info("endpoints changed on %s", datetime.now()) + getattr(self.on, "integration_endpoints_changed").emit( + event.relation, app=event.app, unit=event.unit + ) + return + + +class KafkaConnectRequires(KafkaConnectRequirerData, KafkaConnectRequirerEventHandlers): + """Requirer-side implementation of the Kafka Connect relation.""" + + def __init__( + self, + charm: CharmBase, + relation_name: str, + plugin_url: str, + extra_user_roles: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + ) -> None: + KafkaConnectRequirerData.__init__( + self, + charm.model, + relation_name, + plugin_url, + extra_user_roles=extra_user_roles, + additional_secret_fields=additional_secret_fields, + ) + KafkaConnectRequirerEventHandlers.__init__(self, charm, self) + + +# Opensearch related events + + +class OpenSearchProvidesEvent(RelationEvent): + """Base class for OpenSearch events.""" + + @property + def index(self) -> Optional[str]: + """Returns the index that was requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("index") + + +class IndexRequestedEvent(OpenSearchProvidesEvent): + """Event emitted when a new index is requested for use on this relation.""" + + @property + def extra_user_roles(self) -> Optional[str]: + """Returns the extra user roles that were requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("extra-user-roles") + + +class IndexEntityRequestedEvent(OpenSearchProvidesEvent, EntityProvidesEvent): + """Event emitted when a new entity is requested for use on this relation.""" + + +class IndexEntityPermissionsChangedEvent(OpenSearchProvidesEvent, EntityProvidesEvent): + """Event emitted when existing entity permissions are changed on this relation.""" + + +class OpenSearchProvidesEvents(CharmEvents): + """OpenSearch events. + + This class defines the events that OpenSearch can emit. + """ + + index_requested = EventSource(IndexRequestedEvent) + index_entity_requested = EventSource(IndexEntityRequestedEvent) + index_entity_permissions_changed = EventSource(IndexEntityPermissionsChangedEvent) + + +class OpenSearchRequiresEvent(DatabaseRequiresEvent): + """Base class for OpenSearch requirer events.""" + + +class IndexCreatedEvent(AuthenticationEvent, OpenSearchRequiresEvent): + """Event emitted when a new index is created for use on this relation.""" + + +class IndexEntityCreatedEvent(EntityRequiresEvent, OpenSearchRequiresEvent): + """Event emitted when a new index is created for use on this relation.""" + + +class OpenSearchRequiresEvents(RequirerCharmEvents): + """OpenSearch events. + + This class defines the events that the opensearch requirer can emit. + """ + + index_created = EventSource(IndexCreatedEvent) + index_entity_created = EventSource(IndexEntityCreatedEvent) + endpoints_changed = EventSource(DatabaseEndpointsChangedEvent) + authentication_updated = EventSource(AuthenticationEvent) + + +# OpenSearch Provides and Requires Objects + + +class OpenSearchProvidesData(ProviderData): + """Provider-side of the OpenSearch relation.""" + + RESOURCE_FIELD = "index" + + def __init__( + self, model: Model, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + super().__init__(model, relation_name, status_schema_path=status_schema_path) + + def set_index(self, relation_id: int, index: str) -> None: + """Set the index in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + index: the index as it is _created_ on the provider charm. This needn't match the + requested index, and can be used to present a different index name if, for example, + the requested index is invalid. + """ + self.update_relation_data(relation_id, {"index": index}) + + def set_endpoints(self, relation_id: int, endpoints: str) -> None: + """Set the endpoints in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + endpoints: the endpoint addresses for opensearch nodes. + """ + self.update_relation_data(relation_id, {"endpoints": endpoints}) + + def set_version(self, relation_id: int, version: str) -> None: + """Set the opensearch version in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + version: database version. + """ + self.update_relation_data(relation_id, {"version": version}) + + +class OpenSearchProvidesEventHandlers(ProviderEventHandlers): + """Provider-side of the OpenSearch relation.""" + + on = OpenSearchProvidesEvents() # pyright: ignore[reportAssignmentType] + + def __init__(self, charm: CharmBase, relation_data: OpenSearchProvidesData) -> None: + super().__init__(charm, relation_data) + # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above + self.relation_data = relation_data + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation has changed.""" + super()._on_relation_changed_event(event) + + # Leader only + if not self.relation_data.local_unit.is_leader(): + return + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + # Validate entity information is not dynamically changed + self._validate_entity_consistency(event, diff) + + # Emit an index requested event if the setup key (index name) + # was added to the relation databag, but the entity-type key was not. + if "index" in diff.added and "entity-type" not in diff.added: + getattr(self.on, "index_requested").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit an entity requested event if the setup key (index name) + # was added to the relation databag, in addition to the entity-type key. + if "index" in diff.added and "entity-type" in diff.added: + getattr(self.on, "index_entity_requested").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit a permissions changed event if the setup key (index name) + # was added to the relation databag, and the entity-permissions key changed. + if ( + "index" not in diff.added + and "entity-type" not in diff.added + and ("entity-permissions" in diff.added or "entity-permissions" in diff.changed) + ): + getattr(self.on, "index_entity_permissions_changed").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: + """Event emitted when the relation data has changed.""" + pass + + +class OpenSearchProvides(OpenSearchProvidesData, OpenSearchProvidesEventHandlers): + """Provider-side of the OpenSearch relation.""" + + def __init__( + self, charm: CharmBase, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + OpenSearchProvidesData.__init__( + self, charm.model, relation_name, status_schema_path=status_schema_path + ) + OpenSearchProvidesEventHandlers.__init__(self, charm, self) + + +class OpenSearchRequiresData(RequirerData): + """Requires data side of the OpenSearch relation.""" + + def __init__( + self, + model: Model, + relation_name: str, + index: str, + extra_user_roles: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + ): + """Manager of OpenSearch client relations.""" + super().__init__( + model, + relation_name, + extra_user_roles, + additional_secret_fields, + extra_group_roles, + entity_type, + entity_permissions, + ) + self.index = index + + +class OpenSearchRequiresEventHandlers(RequirerEventHandlers): + """Requires events side of the OpenSearch relation.""" + + on = OpenSearchRequiresEvents() # pyright: ignore[reportAssignmentType] + + def __init__(self, charm: CharmBase, relation_data: OpenSearchRequiresData) -> None: + super().__init__(charm, relation_data) + # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above + self.relation_data = relation_data + + def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: + """Event emitted when the OpenSearch relation is created.""" + super()._on_relation_created_event(event) + + if not self.relation_data.local_unit.is_leader(): + return + + # Sets both index and extra user roles in the relation if the roles are provided. + # Otherwise, sets only the index. + data = {"index": self.relation_data.index} + + if self.relation_data.extra_user_roles: + data["extra-user-roles"] = self.relation_data.extra_user_roles + if self.relation_data.extra_group_roles: + data["extra-group-roles"] = self.relation_data.extra_group_roles + if self.relation_data.entity_type: + data["entity-type"] = self.relation_data.entity_type + if self.relation_data.entity_permissions: + data["entity-permissions"] = self.relation_data.entity_permissions + + self.relation_data.update_relation_data(event.relation.id, data) + + def _on_secret_changed_event(self, event: SecretChangedEvent): + """Event notifying about a new value of a secret.""" + if not event.secret.label: + return + + relation = self.relation_data._relation_from_secret_label(event.secret.label) + if not relation: + logging.info( + f"Received secret {event.secret.label} but couldn't parse, seems irrelevant" + ) + return + + if relation.name != self.relation_data.relation_name: + logger.debug( + "Ignoring secret-changed from endpoint %s (expected %s)", + relation.name, + self.relation_data.relation_name, + ) + return + + if relation.app == self.charm.app: + logging.info("Secret changed event ignored for Secret Owner") + + remote_unit = None + for unit in relation.units: + if unit.app != self.charm.app: + remote_unit = unit + + logger.info("authentication updated") + getattr(self.on, "authentication_updated").emit( + relation, app=relation.app, unit=remote_unit + ) + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the OpenSearch relation has changed. + + This event triggers individual custom events depending on the changing relation. + """ + super()._on_relation_changed_event(event) + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + # Register all new secrets with their labels + if any(newval for newval in diff.added if self.relation_data._is_secret_field(newval)): + self.relation_data._register_secrets_to_relation(event.relation, diff.added) + + secret_field_user = self.relation_data._generate_secret_field_name(SECRET_GROUPS.USER) + secret_field_tls = self.relation_data._generate_secret_field_name(SECRET_GROUPS.TLS) + updates = {"username", "password", "tls", "tls-ca", secret_field_user, secret_field_tls} + if len(set(diff._asdict().keys()) - updates) < len(diff): + logger.info("authentication updated at: %s", datetime.now()) + getattr(self.on, "authentication_updated").emit( + event.relation, app=event.app, unit=event.unit + ) + + app_databag = get_encoded_dict(event.relation, event.app, "data") + if app_databag is None: + app_databag = {} + + # Check if the index is created + # (the OpenSearch charm shares the credentials). + if self._main_credentials_shared(diff) and "entity-type" not in app_databag: + # Emit the default event (the one without an alias). + logger.info("index created at: %s", datetime.now()) + getattr(self.on, "index_created").emit(event.relation, app=event.app, unit=event.unit) + + # To avoid unnecessary application restarts do not trigger other events. + return + + if self._entity_credentials_shared(diff) and "entity-type" in app_databag: + # Emit the default event (the one without an alias). + logger.info("entity created at: %s", datetime.now()) + getattr(self.on, "index_entity_created").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + # Emit a endpoints changed event if the OpenSearch application + # added or changed this info in the relation databag. + if "endpoints" in diff.added or "endpoints" in diff.changed: + # Emit the default event (the one without an alias). + logger.info("endpoints changed on %s", datetime.now()) + getattr(self.on, "endpoints_changed").emit( + event.relation, app=event.app, unit=event.unit + ) + + # To avoid unnecessary application restarts do not trigger other events. + return + + +class OpenSearchRequires(OpenSearchRequiresData, OpenSearchRequiresEventHandlers): + """Requires-side of the OpenSearch relation.""" + + def __init__( + self, + charm: CharmBase, + relation_name: str, + index: str, + extra_user_roles: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + ) -> None: + OpenSearchRequiresData.__init__( + self, + charm.model, + relation_name, + index, + extra_user_roles, + additional_secret_fields, + extra_group_roles, + entity_type, + entity_permissions, + ) + OpenSearchRequiresEventHandlers.__init__(self, charm, self) + + +# Etcd related events + + +class EtcdProviderEvent(RelationEventWithSecret): + """Base class for Etcd events.""" + + @property + def prefix(self) -> Optional[str]: + """Returns the index that was requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("prefix") + + @property + def mtls_cert(self) -> Optional[str]: + """Returns TLS cert of the client.""" + if not self.relation.app: + return None + + if not self.secrets_enabled: + raise SecretsUnavailableError("Secrets unavailable on current Juju version") + + secret_field = f"{PROV_SECRET_PREFIX}{SECRET_GROUPS.MTLS}" + if secret_uri := self.relation.data[self.app].get(secret_field): + secret = self.framework.model.get_secret(id=secret_uri) + content = secret.get_content(refresh=True) + if content: + return content.get("mtls-cert") + + +class MTLSCertUpdatedEvent(EtcdProviderEvent): + """Event emitted when the mtls relation is updated.""" + + def __init__(self, handle, relation, old_mtls_cert: Optional[str] = None, app=None, unit=None): + super().__init__(handle, relation, app, unit) + + self.old_mtls_cert = old_mtls_cert + + def snapshot(self): + """Return a snapshot of the event.""" + return super().snapshot() | {"old_mtls_cert": self.old_mtls_cert} + + def restore(self, snapshot): + """Restore the event from a snapshot.""" + super().restore(snapshot) + self.old_mtls_cert = snapshot["old_mtls_cert"] + + +class EtcdProviderEvents(CharmEvents): + """Etcd events. + + This class defines the events that Etcd can emit. + """ + + mtls_cert_updated = EventSource(MTLSCertUpdatedEvent) + + +class EtcdReadyEvent(AuthenticationEvent, DatabaseRequiresEvent): + """Event emitted when the etcd relation is ready to be consumed.""" + + +class EtcdRequirerEvents(RequirerCharmEvents): + """Etcd events. + + This class defines the events that the etcd requirer can emit. + """ + + endpoints_changed = EventSource(DatabaseEndpointsChangedEvent) + etcd_ready = EventSource(EtcdReadyEvent) + + +# Etcd Provides and Requires Objects + + +class EtcdProviderData(ProviderData): + """Provider-side of the Etcd relation.""" + + RESOURCE_FIELD = "prefix" + + def __init__( + self, model: Model, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + super().__init__(model, relation_name, status_schema_path=status_schema_path) + + def set_uris(self, relation_id: int, uris: str) -> None: + """Set the database connection URIs in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + uris: connection URIs. + """ + self.update_relation_data(relation_id, {"uris": uris}) + + def set_endpoints(self, relation_id: int, endpoints: str) -> None: + """Set the endpoints in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + endpoints: the endpoint addresses for etcd nodes "ip:port" format. + """ + self.update_relation_data(relation_id, {"endpoints": endpoints}) + + def set_version(self, relation_id: int, version: str) -> None: + """Set the etcd version in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + version: etcd API version. + """ + self.update_relation_data(relation_id, {"version": version}) + + def set_tls_ca(self, relation_id: int, tls_ca: str) -> None: + """Set the TLS CA in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + tls_ca: TLS certification authority. + """ + self.update_relation_data(relation_id, {"tls-ca": tls_ca, "tls": "True"}) + + +class EtcdProviderEventHandlers(ProviderEventHandlers): + """Provider-side of the Etcd relation.""" + + on = EtcdProviderEvents() # pyright: ignore[reportAssignmentType] + + def __init__(self, charm: CharmBase, relation_data: EtcdProviderData) -> None: + super().__init__(charm, relation_data) + # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above + self.relation_data = relation_data + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the relation has changed.""" + super()._on_relation_changed_event(event) + # register all new secrets with their labels + new_data_keys = list(event.relation.data[event.app].keys()) + if any(newval for newval in new_data_keys if self.relation_data._is_secret_field(newval)): + self.relation_data._register_secrets_to_relation(event.relation, new_data_keys) + + # Check which data has changed to emit customs events. + diff = self._diff(event) + + # Validate entity information is not dynamically changed + self._validate_entity_consistency(event, diff) + + getattr(self.on, "mtls_cert_updated").emit(event.relation, app=event.app, unit=event.unit) + return + + def _on_secret_changed_event(self, event: SecretChangedEvent): + """Event notifying about a new value of a secret.""" + if not event.secret.label: + return + + relation = self.relation_data._relation_from_secret_label(event.secret.label) + if not relation: + logging.info( + f"Received secret {event.secret.label} but couldn't parse, seems irrelevant" + ) + return + + if relation.name != self.relation_data.relation_name: + logger.debug( + "Ignoring secret-changed from endpoint %s (expected %s)", + relation.name, + self.relation_data.relation_name, + ) + return + + if relation.app == self.charm.app: + logging.info("Secret changed event ignored for Secret Owner") + + remote_unit = None + for unit in relation.units: + if unit.app != self.charm.app: + remote_unit = unit + + old_mtls_cert = event.secret.get_content().get("mtls-cert") + # mtls-cert is the only secret that can be updated + logger.info("mtls-cert updated") + getattr(self.on, "mtls_cert_updated").emit( + relation, app=relation.app, unit=remote_unit, old_mtls_cert=old_mtls_cert + ) + + +class EtcdProvides(EtcdProviderData, EtcdProviderEventHandlers): + """Provider-side of the Etcd relation.""" + + def __init__( + self, charm: CharmBase, relation_name: str, status_schema_path: OptionalPathLike = None + ) -> None: + EtcdProviderData.__init__( + self, charm.model, relation_name, status_schema_path=status_schema_path + ) + EtcdProviderEventHandlers.__init__(self, charm, self) + if not self.secrets_enabled: + raise SecretsUnavailableError("Secrets unavailable on current Juju version") + + +class EtcdRequirerData(RequirerData): + """Requires data side of the Etcd relation.""" + + def __init__( + self, + model: Model, + relation_name: str, + prefix: str, + mtls_cert: Optional[str], + extra_user_roles: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + ): + """Manager of Etcd client relations.""" + super().__init__( + model, + relation_name, + extra_user_roles, + additional_secret_fields, + extra_group_roles, + entity_type, + entity_permissions, + ) + self.prefix = prefix + self.mtls_cert = mtls_cert + + def set_mtls_cert(self, relation_id: int, mtls_cert: str) -> None: + """Set the mtls cert in the application relation databag / secret. + + Args: + relation_id: the identifier for a particular relation. + mtls_cert: mtls cert. + """ + self.update_relation_data(relation_id, {"mtls-cert": mtls_cert}) + + +class EtcdRequirerEventHandlers(RequirerEventHandlers): + """Requires events side of the Etcd relation.""" + + on = EtcdRequirerEvents() # pyright: ignore[reportAssignmentType] + + def __init__(self, charm: CharmBase, relation_data: EtcdRequirerData) -> None: + super().__init__(charm, relation_data) + # Just to keep lint quiet, can't resolve inheritance. The same happened in super().__init__() above + self.relation_data = relation_data + + def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: + """Event emitted when the Etcd relation is created.""" + super()._on_relation_created_event(event) + + payload = { + "prefix": self.relation_data.prefix, + } + if self.relation_data.mtls_cert: + payload["mtls-cert"] = self.relation_data.mtls_cert + + self.relation_data.update_relation_data( + event.relation.id, + payload, + ) + + def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: + """Event emitted when the Etcd relation has changed. + + This event triggers individual custom events depending on the changing relation. + """ + super()._on_relation_changed_event(event) + + # Check which data has changed to emit customs events. + diff = self._diff(event) + # Register all new secrets with their labels + if any(newval for newval in diff.added if self.relation_data._is_secret_field(newval)): + self.relation_data._register_secrets_to_relation(event.relation, diff.added) + + secret_field_user = self.relation_data._generate_secret_field_name(SECRET_GROUPS.USER) + secret_field_tls = self.relation_data._generate_secret_field_name(SECRET_GROUPS.TLS) + + # Emit a endpoints changed event if the etcd application added or changed this info + # in the relation databag. + if "endpoints" in diff.added or "endpoints" in diff.changed: + # Emit the default event (the one without an alias). + logger.info("endpoints changed on %s", datetime.now()) + getattr(self.on, "endpoints_changed").emit( + event.relation, app=event.app, unit=event.unit + ) + + if ( + secret_field_tls in diff.added + or secret_field_tls in diff.changed + or secret_field_user in diff.added + or secret_field_user in diff.changed + or "username" in diff.added + or "username" in diff.changed + ): + # Emit the default event (the one without an alias). + logger.info("etcd ready on %s", datetime.now()) + getattr(self.on, "etcd_ready").emit(event.relation, app=event.app, unit=event.unit) + + def _on_secret_changed_event(self, event: SecretChangedEvent): + """Event notifying about a new value of a secret.""" + if not event.secret.label: + return + + relation = self.relation_data._relation_from_secret_label(event.secret.label) + if not relation: + logging.info( + f"Received secret {event.secret.label} but couldn't parse, seems irrelevant" + ) + return + + if relation.app == self.charm.app: + logging.info("Secret changed event ignored for Secret Owner") + + if relation.name != self.relation_data.relation_name: + logger.debug( + "Ignoring secret-changed from endpoint %s (expected %s)", + relation.name, + self.relation_data.relation_name, + ) + return + + remote_unit = None + for unit in relation.units: + if unit.app != self.charm.app: + remote_unit = unit + + # secret-user or secret-tls updated + logger.info("etcd_ready updated") + getattr(self.on, "etcd_ready").emit(relation, app=relation.app, unit=remote_unit) + + +class EtcdRequires(EtcdRequirerData, EtcdRequirerEventHandlers): + """Requires-side of the Etcd relation.""" + + def __init__( + self, + charm: CharmBase, + relation_name: str, + prefix: str, + mtls_cert: Optional[str], + extra_user_roles: Optional[str] = None, + additional_secret_fields: Optional[List[str]] = [], + extra_group_roles: Optional[str] = None, + entity_type: Optional[str] = None, + entity_permissions: Optional[str] = None, + ) -> None: + EtcdRequirerData.__init__( + self, + charm.model, + relation_name, + prefix, + mtls_cert, + extra_user_roles, + additional_secret_fields, + extra_group_roles, + entity_type, + entity_permissions, + ) + EtcdRequirerEventHandlers.__init__(self, charm, self) + if not self.secrets_enabled: + raise SecretsUnavailableError("Secrets unavailable on current Juju version") diff --git a/charms/garm-operator/lib/charms/grafana_k8s/v0/grafana_dashboard.py b/charms/garm-operator/lib/charms/grafana_k8s/v0/grafana_dashboard.py new file mode 100644 index 00000000..9886fc2b --- /dev/null +++ b/charms/garm-operator/lib/charms/grafana_k8s/v0/grafana_dashboard.py @@ -0,0 +1,2159 @@ +# Copyright 2021 Canonical Ltd. +# See LICENSE file for licensing details. + +"""## Overview. + +This document explains how to integrate with the Grafana charm +for the purpose of providing a dashboard which can be used by +end users. It also explains the structure of the data +expected by the `grafana-dashboard` interface, and may provide a +mechanism or reference point for providing a compatible interface +or library by providing a definitive reference guide to the +structure of relation data which is shared between the Grafana +charm and any charm providing datasource information. + +## Provider Library Usage + +The Grafana charm interacts with its dashboards using its charm +library. The goal of this library is to be as simple to use as +possible, and instantiation of the class with or without changing +the default arguments provides a complete use case. For the simplest +use case of a charm which bundles dashboards and provides a +`provides: grafana-dashboard` interface, + + requires: + grafana-dashboard: + interface: grafana_dashboard + +creation of a `GrafanaDashboardProvider` object with the default arguments is +sufficient. + +:class:`GrafanaDashboardProvider` expects that bundled dashboards should +be included in your charm with a default path of: + + path/to/charm.py + path/to/src/grafana_dashboards/*.{json|json.tmpl|.tmpl} + +Where the files are Grafana dashboard JSON data either from the +Grafana marketplace, or directly exported from a Grafana instance. +Refer to the [official docs](https://grafana.com/tutorials/provision-dashboards-and-data-sources/) +for more information. + +When constructing a dashboard that is intended to be consumed by COS, make sure to use variables +for your datasources, and name them "prometheusds" and "lokids". You can also use the following +juju topology variables in your dashboards: $juju_model, $juju_model_uuid, $juju_application +and $juju_unit. Note, however, that if metrics are coming via peripheral charms (scrape-config +or cos-config) then topology labels would not exist. + +The default constructor arguments are: + + `charm`: `self` from the charm instantiating this library + `relation_name`: grafana-dashboard + `dashboards_path`: "/src/grafana_dashboards" + +If your configuration requires any changes from these defaults, they +may be set from the class constructor. It may be instantiated as +follows: + + from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardProvider + + class FooCharm: + def __init__(self, *args): + super().__init__(*args, **kwargs) + ... + self.grafana_dashboard_provider = GrafanaDashboardProvider(self) + ... + +The first argument (`self`) should be a reference to the parent (providing +dashboards), as this charm's lifecycle events will be used to re-submit +dashboard information if a charm is upgraded, the pod is restarted, or other. + +An instantiated `GrafanaDashboardProvider` validates that the path specified +in the constructor (or the default) exists, reads the file contents, then +compresses them with LZMA and adds them to the application relation data +when a relation is established with Grafana. + +Provided dashboards will be checked by Grafana, and a series of dropdown menus +providing the ability to select query targets by Juju Model, application instance, +and unit will be added if they do not exist. + +To avoid requiring `jinja` in `GrafanaDashboardProvider` users, template validation +and rendering occurs on the other side of the relation, and relation data in +the form of: + + { + "event": { + "valid": `true|false`, + "errors": [], + } + } + +Will be returned if rendering or validation fails. In this case, the +`GrafanaDashboardProvider` object will emit a `dashboard_status_changed` event +of the type :class:`GrafanaDashboardEvent`, which will contain information +about the validation error. + +This information is added to the relation data for the charms as serialized JSON +from a dict, with a structure of: +``` +{ + "application": { + "dashboards": { + "uuid": a uuid generated to ensure a relation event triggers, + "templates": { + "file:{hash}": { + "content": `{compressed_template_data}`, + "charm": `charm.meta.name`, + "juju_topology": { + "model": `charm.model.name`, + "model_uuid": `charm.model.uuid`, + "application": `charm.app.name`, + "unit": `charm.unit.name`, + } + }, + "file:{other_file_hash}": { + ... + }, + }, + }, + }, +} +``` + +This is ingested by :class:`GrafanaDashboardConsumer`, and is sufficient for configuration. + +The [COS Configuration Charm](https://charmhub.io/cos-configuration-k8s) can be used to +add dashboards which are not bundled with charms. + +## Consumer Library Usage + +The `GrafanaDashboardConsumer` object may be used by Grafana +charms to manage relations with available dashboards. For this +purpose, a charm consuming Grafana dashboard information should do +the following things: + +1. Instantiate the `GrafanaDashboardConsumer` object by providing it a +reference to the parent (Grafana) charm and, optionally, the name of +the relation that the Grafana charm uses to interact with dashboards. +This relation must confirm to the `grafana-dashboard` interface. + +For example a Grafana charm may instantiate the +`GrafanaDashboardConsumer` in its constructor as follows + + from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardConsumer + + def __init__(self, *args): + super().__init__(*args) + ... + self.grafana_dashboard_consumer = GrafanaDashboardConsumer(self) + ... + +2. A Grafana charm also needs to listen to the +`GrafanaDashboardConsumer` events emitted by the `GrafanaDashboardConsumer` +by adding itself as an observer for these events: + + self.framework.observe( + self.grafana_source_consumer.on.sources_changed, + self._on_dashboards_changed, + ) + +Dashboards can be retrieved via the `dashboards` method: + +It will be returned in the format of: + +``` +[ + { + "id": unique_id, + "relation_id": relation_id, + "charm": the name of the charm which provided the dashboard, + "content": compressed_template_data + }, +] +``` + +The consuming charm should decompress the dashboard. +""" + +import hashlib +import json +import logging +import lzma +import os +import platform +import re +import subprocess +import tempfile +import uuid +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Tuple +import yaml +from cosl import DashboardPath40UID, LZMABase64 +from cosl.types import type_convert_stored +from ops.charm import ( + CharmBase, + HookEvent, + RelationBrokenEvent, + RelationChangedEvent, + RelationCreatedEvent, + RelationEvent, + RelationRole, +) +from ops.framework import ( + EventBase, + EventSource, + Object, + ObjectEvents, + StoredState, +) +from ops.model import Relation + +# The unique Charmhub library identifier, never change it +LIBID = "c49eb9c7dfef40c7b6235ebd67010a3f" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version + +LIBPATCH = 49 + +PYDEPS = ["cosl >= 0.0.50"] + +logger = logging.getLogger(__name__) + + +DEFAULT_RELATION_NAME = "grafana-dashboard" +DEFAULT_PEER_NAME = "grafana" +RELATION_INTERFACE_NAME = "grafana_dashboard" + +TOPOLOGY_TEMPLATE_DROPDOWNS = [ # type: ignore + { + "allValue": ".*", + "datasource": "${prometheusds}", + "definition": "label_values(up,juju_model)", + "description": None, + "error": None, + "hide": 0, + "includeAll": True, + "label": "Juju model", + "multi": True, + "name": "juju_model", + "query": { + "query": "label_values(up,juju_model)", + "refId": "StandardVariableQuery", + }, + "refresh": 1, + "regex": "", + "skipUrlSync": False, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": False, + }, + { + "allValue": ".*", + "datasource": "${prometheusds}", + "definition": 'label_values(up{juju_model=~"$juju_model"},juju_model_uuid)', + "description": None, + "error": None, + "hide": 0, + "includeAll": True, + "label": "Juju model uuid", + "multi": True, + "name": "juju_model_uuid", + "query": { + "query": 'label_values(up{juju_model=~"$juju_model"},juju_model_uuid)', + "refId": "StandardVariableQuery", + }, + "refresh": 1, + "regex": "", + "skipUrlSync": False, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": False, + }, + { + "allValue": ".*", + "datasource": "${prometheusds}", + "definition": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid"},juju_application)', + "description": None, + "error": None, + "hide": 0, + "includeAll": True, + "label": "Juju application", + "multi": True, + "name": "juju_application", + "query": { + "query": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid"},juju_application)', + "refId": "StandardVariableQuery", + }, + "refresh": 1, + "regex": "", + "skipUrlSync": False, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": False, + }, + { + "allValue": ".*", + "datasource": "${prometheusds}", + "definition": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid",juju_application=~"$juju_application"},juju_unit)', + "description": None, + "error": None, + "hide": 0, + "includeAll": True, + "label": "Juju unit", + "multi": True, + "name": "juju_unit", + "query": { + "query": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid",juju_application=~"$juju_application"},juju_unit)', + "refId": "StandardVariableQuery", + }, + "refresh": 1, + "regex": "", + "skipUrlSync": False, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": False, + }, +] + +DATASOURCE_TEMPLATE_DROPDOWNS = [ # type: ignore + { + "description": None, + "error": None, + "hide": 0, + "includeAll": True, + "label": "Prometheus datasource", + "multi": True, + "name": "prometheusds", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": False, + "type": "datasource", + }, + { + "description": None, + "error": None, + "hide": 0, + "includeAll": True, + "label": "Loki datasource", + "multi": True, + "name": "lokids", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": False, + "type": "datasource", + }, +] + +REACTIVE_CONVERTER = { # type: ignore + "allValue": None, + "datasource": "${prometheusds}", + "definition": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid",juju_application=~"$juju_application"},host)', + "description": None, + "error": None, + "hide": 0, + "includeAll": True, + "label": "hosts", + "multi": True, + "name": "host", + "options": [], + "query": { + "query": 'label_values(up{juju_model=~"$juju_model",juju_model_uuid=~"$juju_model_uuid",juju_application=~"$juju_application"},host)', + "refId": "StandardVariableQuery", + }, + "refresh": 1, + "regex": "", + "skipUrlSync": False, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": False, +} + + +class RelationNotFoundError(Exception): + """Raised if there is no relation with the given name.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + + super().__init__(self.message) + + +class RelationInterfaceMismatchError(Exception): + """Raised if the relation with the given name has a different interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as " "interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + + super().__init__(self.message) + + +class RelationRoleMismatchError(Exception): + """Raised if the relation with the given name has a different direction.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + + super().__init__(self.message) + + +class InvalidDirectoryPathError(Exception): + """Raised if the grafana dashboards folder cannot be found or is otherwise invalid.""" + + def __init__( + self, + grafana_dashboards_absolute_path: str, + message: str, + ): + self.grafana_dashboards_absolute_path = grafana_dashboards_absolute_path + self.message = message + + super().__init__(self.message) + + +def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: + """Resolve the provided path items against the directory of the main file. + + Look up the directory of the charmed operator file being executed. This is normally + going to be the charm.py file of the charm including this library. Then, resolve + the provided path elements and return its absolute path. + + Raises: + InvalidDirectoryPathError if the resolved path does not exist or it is not a directory + + """ + charm_dir = Path(str(charm.charm_dir)) + if not charm_dir.exists() or not charm_dir.is_dir(): + # Operator Framework does not currently expose a robust + # way to determine the top level charm source directory + # that is consistent across deployed charms and unit tests + # Hence for unit tests the current working directory is used + # TODO: updated this logic when the following ticket is resolved + # https://github.com/canonical/operator/issues/643 + charm_dir = Path(os.getcwd()) + + dir_path = charm_dir.absolute().joinpath(*path_elements) + + if not dir_path.exists(): + raise InvalidDirectoryPathError(str(dir_path), "directory does not exist") + if not dir_path.is_dir(): + raise InvalidDirectoryPathError(str(dir_path), "is not a directory") + + return str(dir_path) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +) -> None: + """Verifies that a relation has the necessary characteristics. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + named like the value of the `relation_name` argument. + RelationInterfaceMismatchError: If the relation interface of the + relation named as the provided `relation_name` argument does not + match the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation named as the provided `relation_name` + argument has a different role than what is specified by the + `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + actual_relation_interface = relation.interface_name + if actual_relation_interface and actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, expected_relation_interface, actual_relation_interface + ) + + if expected_relation_role == RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role == RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +class CharmedDashboard: + """A helper class for handling dashboards on the requirer (Grafana) side.""" + + @classmethod + def _convert_dashboard_fields(cls, content: str, inject_dropdowns: bool = True) -> str: + """Make sure values are present for Juju topology. + + Inserts Juju topology variables and selectors into the template, as well as + a variable for Prometheus. + """ + dict_content = json.loads(content) + datasources = {} + existing_templates = False + + template_dropdowns = ( + TOPOLOGY_TEMPLATE_DROPDOWNS + DATASOURCE_TEMPLATE_DROPDOWNS # type: ignore + if inject_dropdowns + else DATASOURCE_TEMPLATE_DROPDOWNS + ) + + # If the dashboard has __inputs, get the names to replace them. These are stripped + # from reactive dashboards in GrafanaDashboardAggregator, but charm authors in + # newer charms may import them directly from the marketplace + if "__inputs" in dict_content: + for field in dict_content["__inputs"]: + if "type" in field and field["type"] == "datasource": + datasources[field["name"]] = field["pluginName"].lower() + del dict_content["__inputs"] + + # If no existing template variables exist, just insert our own + if "templating" not in dict_content: + dict_content["templating"] = {"list": list(template_dropdowns)} # type: ignore + else: + # Otherwise, set a flag so we can go back later + existing_templates = True + for template_value in dict_content["templating"]["list"]: + # Build a list of `datasource_name`: `datasource_type` mappings + # The "query" field is actually "prometheus", "loki", "influxdb", etc + if "type" in template_value and template_value["type"] == "datasource": + datasources[template_value["name"]] = template_value["query"].lower() + + # Put our own variables in the template + # We only want to inject our own dropdowns IFF they are NOT + # already in the template coming over relation data. + # We'll store all dropdowns in the template from the provider + # in a set. We'll add our own if they are not in this set. + existing_names = { + item.get("name") + for item in dict_content["templating"]["list"] + } + + for d in template_dropdowns: # type: ignore + if d.get("name") not in existing_names: + dict_content["templating"]["list"].insert(0, d) + existing_names.add(d.get("name")) + + dict_content = cls._replace_template_fields(dict_content, datasources, existing_templates) + return json.dumps(dict_content) + + @classmethod + def _replace_template_fields( # noqa: C901 + cls, dict_content: dict, datasources: dict, existing_templates: bool + ) -> dict: + """Make templated fields get cleaned up afterwards. + + If existing datasource variables are present, try to substitute them. + """ + replacements = {"loki": "${lokids}", "prometheus": "${prometheusds}"} + used_replacements = [] # type: List[str] + + # If any existing datasources match types we know, or we didn't find + # any templating variables at all, template them. + if datasources or not existing_templates: + panels = dict_content.get("panels", {}) + if panels: + dict_content["panels"] = cls._template_panels( + panels, replacements, used_replacements, existing_templates, datasources + ) + + # Find panels nested under rows + rows = dict_content.get("rows", {}) + if rows: + for row_idx, row in enumerate(rows): + if "panels" in row.keys(): + rows[row_idx]["panels"] = cls._template_panels( + row["panels"], + replacements, + used_replacements, + existing_templates, + datasources, + ) + + dict_content["rows"] = rows + + # Finally, go back and pop off the templates we stubbed out + deletions = [] + for tmpl in dict_content["templating"]["list"]: + if tmpl["name"] and tmpl["name"] in used_replacements: + # it might happen that existing template var name is the same as the one we insert (i.e prometheusds or lokids) + # in that case, we want to pop the existing one only. + if tmpl not in DATASOURCE_TEMPLATE_DROPDOWNS: + deletions.append(tmpl) + + for d in deletions: + dict_content["templating"]["list"].remove(d) + + return dict_content + + @classmethod + def _template_panels( + cls, + panels: dict, + replacements: dict, + used_replacements: list, + existing_templates: bool, + datasources: dict, + ) -> dict: + """Iterate through a `panels` object and template it appropriately.""" + # Go through all the panels. If they have a datasource set, AND it's one + # that we can convert to ${lokids} or ${prometheusds}, by stripping off the + # ${} templating and comparing the name to the list we built, replace it, + # otherwise, leave it alone. + # + for panel in panels: + if "datasource" not in panel or not panel.get("datasource"): + continue + if not existing_templates: + datasource = panel.get("datasource") + if isinstance(datasource, str): + if "loki" in datasource: + panel["datasource"] = "${lokids}" + elif "grafana" in datasource: + continue + else: + panel["datasource"] = "${prometheusds}" + elif isinstance(datasource, dict): + # In dashboards exported by Grafana 9, datasource type is dict + dstype = datasource.get("type", "") + if dstype == "loki": + panel["datasource"]["uid"] = "${lokids}" + elif dstype == "prometheus": + panel["datasource"]["uid"] = "${prometheusds}" + else: + logger.debug("Unrecognized datasource type '%s'; skipping", dstype) + continue + else: + logger.error("Unknown datasource format: skipping") + continue + else: + if isinstance(panel["datasource"], str): + if panel["datasource"].lower() in replacements.values(): + # Already a known template variable + continue + # Strip out variable characters and maybe braces + ds = re.sub(r"(\$|\{|\})", "", panel["datasource"]) + + if ds not in datasources.keys(): + # Unknown, non-templated datasource, potentially a Grafana builtin + continue + + replacement = replacements.get(datasources[ds], "") + if replacement: + used_replacements.append(ds) + panel["datasource"] = replacement or panel["datasource"] + elif isinstance(panel["datasource"], dict): + dstype = panel["datasource"].get("type", "") + if panel["datasource"].get("uid", "").lower() in replacements.values(): + # Already a known template variable + continue + # Strip out variable characters and maybe braces + ds = re.sub(r"(\$|\{|\})", "", panel["datasource"].get("uid", "")) + + if ds not in datasources.keys(): + # Unknown, non-templated datasource, potentially a Grafana builtin + continue + + replacement = replacements.get(datasources[ds], "") + if replacement: + used_replacements.append(ds) + panel["datasource"]["uid"] = replacement + else: + logger.error("Unknown datasource format: skipping") + continue + return panels + + @classmethod + def _inject_labels(cls, content: str, topology: dict, transformer: "CosTool") -> str: + """Inject Juju topology into panel expressions via CosTool. + + A dashboard will have a structure approximating: + { + "__inputs": [], + "templating": { + "list": [ + { + "name": "prometheusds", + "type": "prometheus" + } + ] + }, + "panels": [ + { + "foo": "bar", + "targets": [ + { + "some": "field", + "expr": "up{job="foo"}" + }, + { + "some_other": "field", + "expr": "sum(http_requests_total{instance="$foo"}[5m])} + } + ], + "datasource": "${someds}" + } + ] + } + + `templating` is used elsewhere in this library, but the structure is not rigid. It is + not guaranteed that a panel will actually have any targets (it could be a "spacer" with + no datasource, hence no expression). It could have only one target. It could have multiple + targets. It could have multiple targets of which only one has an `expr` to evaluate. We need + to try to handle all of these concisely. + + `cos-tool` (`github.com/canonical/cos-tool` as a Go module in general) + does not know "Grafana-isms", such as using `[$_variable]` to modify the query from the user + interface, so we add placeholders (as `5y`, since it must parse, but a dashboard looking for + five years for a panel query would be unusual). + + Args: + content: dashboard content as a string + topology: a dict containing topology values + transformer: a 'CosTool' instance + Returns: + dashboard content with replaced values. + """ + dict_content = json.loads(content) + + if "panels" not in dict_content.keys(): + return json.dumps(dict_content) + + # Go through all the panels and inject topology labels + # Panels may have more than one 'target' where the expressions live, so that must be + # accounted for. Additionally, `promql-transform` does not necessarily gracefully handle + # expressions with range queries including variables. Exclude these. + # + # It is not a certainty that the `datasource` field will necessarily reflect the type, so + # operate on all fields. + panels = dict_content["panels"] + topology_with_prefix = {"juju_{}".format(k): v for k, v in topology.items()} + + # We need to use an index so we can insert the changed element back later + for panel_idx, panel in enumerate(panels): + if not isinstance(panel, dict): + continue + + # Use the index to insert it back in the same location + panels[panel_idx] = cls._modify_panel(panel, topology_with_prefix, transformer) + + return json.dumps(dict_content) + + @classmethod + def _modify_panel(cls, panel: dict, topology: dict, transformer: "CosTool") -> dict: + """Inject Juju topology into panel expressions via CosTool. + + Args: + panel: a dashboard panel as a dict + topology: a dict containing topology values + transformer: a 'CosTool' instance + Returns: + the panel with injected values + """ + if "targets" not in panel.keys(): + return panel + + # Pre-compile a regular expression to grab values from inside of [] + range_re = re.compile(r"\[(?P.*?)\]") + # Do the same for any offsets + offset_re = re.compile(r"offset\s+(?P-?\s*[$\w]+)") + + known_datasources = {"${prometheusds}": "promql", "${lokids}": "logql"} + + targets = panel["targets"] + + # We need to use an index so we can insert the changed element back later + for idx, target in enumerate(targets): + # If there's no expression, we don't need to do anything + if "expr" not in target.keys(): + continue + expr = target["expr"] + + if "datasource" not in panel.keys(): + continue + + if isinstance(panel["datasource"], str): + if panel["datasource"] not in known_datasources: + continue + querytype = known_datasources[panel["datasource"]] + elif isinstance(panel["datasource"], dict): + if panel["datasource"]["uid"] not in known_datasources: + continue + querytype = known_datasources[panel["datasource"]["uid"]] + else: + logger.error("Unknown datasource format: skipping") + continue + + # Capture all values inside `[]` into a list which we'll iterate over later to + # put them back in-order. Then apply the regex again and replace everything with + # `[5y]` so promql/parser will take it. + # + # Then do it again for offsets + range_values = [m.group("value") for m in range_re.finditer(expr)] + expr = range_re.sub(r"[5y]", expr) + + offset_values = [m.group("value") for m in offset_re.finditer(expr)] + expr = offset_re.sub(r"offset 5y", expr) + # Retrieve the new expression (which may be unchanged if there were no label + # matchers in the expression, or if tt was unable to be parsed like logql. It's + # virtually impossible to tell from any datasource "name" in a panel what the + # actual type is without re-implementing a complete dashboard parser, but no + # harm will some from passing invalid promql -- we'll just get the original back. + # + replacement = transformer.inject_label_matchers(expr, topology, querytype) + + if replacement == target["expr"]: + # promql-transform caught an error. Move on + continue + + # Go back and substitute values in [] which were pulled out + # Enumerate with an index... again. The same regex is ok, since it will still match + # `[(.*?)]`, which includes `[5y]`, our placeholder + for i, match in enumerate(range_re.finditer(replacement)): + # Replace one-by-one, starting from the left. We build the string back with + # `str.replace(string_to_replace, replacement_value, count)`. Limit the count + # to one, since we are going through one-by-one through the list we saved earlier + # in `range_values`. + replacement = replacement.replace( + "[{}]".format(match.group("value")), + "[{}]".format(range_values[i]), + 1, + ) + + for i, match in enumerate(offset_re.finditer(replacement)): + # Replace one-by-one, starting from the left. We build the string back with + # `str.replace(string_to_replace, replacement_value, count)`. Limit the count + # to one, since we are going through one-by-one through the list we saved earlier + # in `range_values`. + replacement = replacement.replace( + "offset {}".format(match.group("value")), + "offset {}".format(offset_values[i]), + 1, + ) + + # Use the index to insert it back in the same location + targets[idx]["expr"] = replacement + + panel["targets"] = targets + return panel + + @classmethod + def _content_to_dashboard_object( + cls, + *, + charm_name, + content: str, + juju_topology: dict, + inject_dropdowns: bool = True, + dashboard_alt_uid: Optional[str] = None, + ) -> Dict: + """Helper method for keeping a consistent stored state schema for the dashboard and some metadata. + + Args: + charm_name: Charm name (although the aggregator passes the app name). + content: The compressed dashboard. + juju_topology: This is not actually used in the dashboards, but is present to provide a secondary + salt to ensure uniqueness in the dict keys in case individual charm units provide dashboards. + inject_dropdowns: Whether to auto-render topology dropdowns. + dashboard_alt_uid: Alternative uid used for dashboards added programmatically. + """ + ret = { + "charm": charm_name, + "content": content, + "juju_topology": juju_topology if inject_dropdowns else {}, + "inject_dropdowns": inject_dropdowns, + } + + if dashboard_alt_uid is not None: + ret["dashboard_alt_uid"] = dashboard_alt_uid + + return ret + + @classmethod + def _generate_alt_uid(cls, charm_name: str, key: str) -> str: + """Generate alternative uid for dashboards. + + Args: + charm_name: The name of the charm (not app; from metadata). + key: A string used (along with charm.meta.name) to build the hash uid. + + Returns: A hash string. + """ + raw_dashboard_alt_uid = "{}-{}".format(charm_name, key) + return hashlib.shake_256(raw_dashboard_alt_uid.encode("utf-8")).hexdigest(8) + + @classmethod + def _replace_uid( + cls, *, dashboard_dict: dict, dashboard_path: Path, charm_dir: Path, charm_name: str + ): + # If we're running this from within an aggregator (such as grafana agent), then the uid was + # already rendered there, so we do not want to overwrite it with a uid generated from aggregator's info. + # We overwrite the uid only if it's not a valid "Path40" uid. + original_uid = dashboard_dict.get("uid", "") + + if DashboardPath40UID.is_valid(original_uid): + logger.debug( + "Processed dashboard '%s': kept original uid '%s'", dashboard_path, original_uid + ) + return + + try: + rel_path = str( + dashboard_path.relative_to(charm_dir) + if dashboard_path.is_absolute() + else dashboard_path + ) + except ValueError: + uid = DashboardPath40UID.generate(charm_name, str(dashboard_path)) + else: + uid = DashboardPath40UID.generate(charm_name, rel_path) + + + logger.debug( + "Processed dashboard '%s': replaced original uid '%s' with '%s'", + dashboard_path, + original_uid, + uid, + ) + dashboard_dict["uid"] = uid + + @classmethod + def _add_tags(cls, dashboard_dict: dict, charm_name: str): + tags: List[str] = dashboard_dict.get("tags", []) + if not any(tag.startswith("charm: ") for tag in tags): + tags.append(f"charm: {charm_name}") + dashboard_dict["tags"] = tags + + @classmethod + def load_dashboards_from_dir( + cls, + *, + dashboards_path: Path, + charm_name: str, + charm_dir: Path, + inject_dropdowns: bool, + juju_topology: dict, + path_filter: Callable[[Path], bool] = lambda p: True, + ) -> dict: + """Load dashboards files from directory into a mapping from "dashboard id" to a so-called "dashboard object".""" + + # Path.glob uses fnmatch on the backend, which is pretty limited, so use a + # custom function for the filter + def _is_dashboard(p: Path) -> bool: + return ( + p.is_file() + and p.name.endswith((".json", ".json.tmpl", ".tmpl")) + and path_filter(p) + ) + + dashboard_templates = {} + + for path in filter(_is_dashboard, Path(dashboards_path).glob("**/*")): + try: + dashboard_dict = json.loads(path.read_bytes()) + except json.JSONDecodeError as e: + logger.error("Failed to load dashboard '%s': %s", path, e) + continue + if type(dashboard_dict) is not dict: + logger.error( + "Invalid dashboard '%s': expected dict, got %s", path, type(dashboard_dict) + ) + + cls._replace_uid( + dashboard_dict=dashboard_dict, + dashboard_path=path, + charm_dir=charm_dir, + charm_name=charm_name, + ) + + cls._add_tags(dashboard_dict=dashboard_dict, charm_name=charm_name) + + id = "file:{}".format(path.stem) + dashboard_templates[id] = cls._content_to_dashboard_object( + charm_name=charm_name, + content=LZMABase64.compress(json.dumps(dashboard_dict)), + dashboard_alt_uid=cls._generate_alt_uid(charm_name, id), + inject_dropdowns=inject_dropdowns, + juju_topology=juju_topology, + ) + + return dashboard_templates + + +class GrafanaDashboardsChanged(EventBase): + """Event emitted when Grafana dashboards change.""" + + def __init__(self, handle, data=None): + super().__init__(handle) + self.data = data + + def snapshot(self) -> Dict: + """Save grafana source information.""" + return {"data": self.data} + + def restore(self, snapshot): + """Restore grafana source information.""" + self.data = snapshot["data"] + + +class GrafanaDashboardEvents(ObjectEvents): + """Events raised by :class:`GrafanaSourceEvents`.""" + + dashboards_changed = EventSource(GrafanaDashboardsChanged) + + +class GrafanaDashboardEvent(EventBase): + """Event emitted when Grafana dashboards cannot be resolved. + + Enables us to set a clear status on the provider. + """ + + def __init__(self, handle, errors: List[Dict[str, str]] = [], valid: bool = False): + super().__init__(handle) + self.errors = errors + self.error_message = "; ".join([error["error"] for error in errors if "error" in error]) + self.valid = valid + + def snapshot(self) -> Dict: + """Save grafana source information.""" + return { + "error_message": self.error_message, + "valid": self.valid, + "errors": json.dumps(self.errors), + } + + def restore(self, snapshot): + """Restore grafana source information.""" + self.error_message = snapshot["error_message"] + self.valid = snapshot["valid"] + self.errors = json.loads(str(snapshot["errors"])) + + +class GrafanaProviderEvents(ObjectEvents): + """Events raised by :class:`GrafanaSourceEvents`.""" + + dashboard_status_changed = EventSource(GrafanaDashboardEvent) + + +class GrafanaDashboardProvider(Object): + """An API to provide Grafana dashboards to a Grafana charm.""" + + _stored = StoredState() + on = GrafanaProviderEvents() # pyright: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + dashboards_path: str = "src/grafana_dashboards", + ) -> None: + """API to provide Grafana dashboard to a Grafana charmed operator. + + The :class:`GrafanaDashboardProvider` object provides an API + to upload dashboards to a Grafana charm. In its most streamlined + usage, the :class:`GrafanaDashboardProvider` is integrated in a + charmed operator as follows: + + self.grafana = GrafanaDashboardProvider(self) + + The :class:`GrafanaDashboardProvider` will look for dashboard + templates in the `/grafana_dashboards` folder. + Additionally, dashboard templates can be uploaded programmatically + via the :method:`GrafanaDashboardProvider.add_dashboard` method. + + To use the :class:`GrafanaDashboardProvider` API, you need a relation + defined in your charm operator's metadata.yaml as follows: + + provides: + grafana-dashboard: + interface: grafana_dashboard + + If you would like to use relation name other than `grafana-dashboard`, + you will need to specify the relation name via the `relation_name` + argument when instantiating the :class:`GrafanaDashboardProvider` object. + However, it is strongly advised to keep the default relation name, + so that people deploying your charm will have a consistent experience + with all other charms that provide Grafana dashboards. + + It is possible to provide a different file path for the Grafana dashboards + to be automatically managed by the :class:`GrafanaDashboardProvider` object + via the `dashboards_path` argument. This may be necessary when the directory + structure of your charmed operator repository is not the "usual" one as + generated by `charmcraft init`, for example when adding the charmed operator + in a Java repository managed by Maven or Gradle. However, unless there are + such constraints with other tooling, it is strongly advised to store the + Grafana dashboards in the default `/grafana_dashboards` + folder, in order to provide a consistent experience for other charmed operator + authors. + + Args: + charm: a :class:`CharmBase` object which manages this + :class:`GrafanaProvider` object. Generally this is + `self` in the instantiating class. + relation_name: a :string: name of the relation managed by this + :class:`GrafanaDashboardProvider`; it defaults to "grafana-dashboard". + dashboards_path: a filesystem path relative to the charm root + where dashboard templates can be located. By default, the library + expects dashboard files to be in the `/grafana_dashboards` + directory. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + + try: + dashboards_path = _resolve_dir_against_charm_path(charm, dashboards_path) + except InvalidDirectoryPathError as e: + logger.warning( + "Invalid Grafana dashboards folder at %s: %s", + e.grafana_dashboards_absolute_path, + e.message, + ) + + super().__init__(charm, relation_name) + + self._charm = charm + self._relation_name = relation_name + self._dashboards_path = dashboards_path + + # No peer relation bucket we can rely on providers, keep StoredState here, too + self._stored.set_default(dashboard_templates={}) # type: ignore + + self.framework.observe(self._charm.on.leader_elected, self._update_all_dashboards_from_dir) + self.framework.observe(self._charm.on.upgrade_charm, self._update_all_dashboards_from_dir) + self.framework.observe(self._charm.on.config_changed, self._update_all_dashboards_from_dir) + + self.framework.observe( + self._charm.on[self._relation_name].relation_created, + self._on_grafana_dashboard_relation_created, + ) + self.framework.observe( + self._charm.on[self._relation_name].relation_changed, + self._on_grafana_dashboard_relation_changed, + ) + + def add_dashboard(self, content: str, inject_dropdowns: bool = True) -> None: + """Add a dashboard to the relation managed by this :class:`GrafanaDashboardProvider`. + + Args: + content: a string representing a Jinja template. Currently, no + global variables are added to the Jinja template evaluation + context. + inject_dropdowns: a :boolean: indicating whether topology dropdowns should be + added to the dashboard + """ + # Update of storage must be done irrespective of leadership, so + # that the stored state is there when this unit becomes leader. + stored_dashboard_templates: Any = self._stored.dashboard_templates # pyright: ignore + + encoded_dashboard = LZMABase64.compress(content) + + # Use as id the first chars of the encoded dashboard, so that + # it is predictable across units. + id = "prog:{}".format(encoded_dashboard[-24:-16]) + + stored_dashboard_templates[id] = CharmedDashboard._content_to_dashboard_object( + charm_name=self._charm.meta.name, + content=encoded_dashboard, + dashboard_alt_uid=CharmedDashboard._generate_alt_uid(self._charm.meta.name, id), + inject_dropdowns=inject_dropdowns, + juju_topology=self._juju_topology, + ) + + if self._charm.unit.is_leader(): + for dashboard_relation in self._charm.model.relations[self._relation_name]: + self._upset_dashboards_on_relation(dashboard_relation) + + def remove_non_builtin_dashboards(self) -> None: + """Remove all dashboards to the relation added via :method:`add_dashboard`.""" + # Update of storage must be done irrespective of leadership, so + # that the stored state is there when this unit becomes leader. + stored_dashboard_templates: Any = self._stored.dashboard_templates # pyright: ignore + + for dashboard_id in list(stored_dashboard_templates.keys()): + if dashboard_id.startswith("prog:"): + del stored_dashboard_templates[dashboard_id] + self._stored.dashboard_templates = stored_dashboard_templates + + if self._charm.unit.is_leader(): + for dashboard_relation in self._charm.model.relations[self._relation_name]: + self._upset_dashboards_on_relation(dashboard_relation) + + def update_dashboards(self) -> None: + """Trigger the re-evaluation of the data on all relations.""" + if self._charm.unit.is_leader(): + for dashboard_relation in self._charm.model.relations[self._relation_name]: + self._upset_dashboards_on_relation(dashboard_relation) + + def reload_dashboards(self, inject_dropdowns: bool = True) -> None: + """Reloads dashboards and updates all relations.""" + self._update_all_dashboards_from_dir(inject_dropdowns=inject_dropdowns) + + def _update_all_dashboards_from_dir( + self, _: Optional[HookEvent] = None, inject_dropdowns: bool = True + ) -> None: + """Scans the built-in dashboards and updates relations with changes.""" + # Update of storage must be done irrespective of leadership, so + # that the stored state is there when this unit becomes leader. + + # Ensure we do not leave outdated dashboards by removing from stored all + # the encoded dashboards that start with "file/". + if self._dashboards_path: + stored_dashboard_templates: Any = self._stored.dashboard_templates # pyright: ignore + + for dashboard_id in list(stored_dashboard_templates.keys()): + if dashboard_id.startswith("file:"): + del stored_dashboard_templates[dashboard_id] + + stored_dashboard_templates.update( + CharmedDashboard.load_dashboards_from_dir( + dashboards_path=Path(self._dashboards_path), + charm_name=self._charm.meta.name, + charm_dir=self._charm.charm_dir, + inject_dropdowns=inject_dropdowns, + juju_topology=self._juju_topology, + ) + ) + + if self._charm.unit.is_leader(): + for dashboard_relation in self._charm.model.relations[self._relation_name]: + self._upset_dashboards_on_relation(dashboard_relation) + + def _reinitialize_dashboard_data(self, inject_dropdowns: bool = True) -> None: + """Triggers a reload of dashboard outside an eventing workflow. + + Args: + inject_dropdowns: a :bool: used to indicate whether topology dropdowns should be added + + This will destroy any existing relation data. + """ + try: + _resolve_dir_against_charm_path(self._charm, self._dashboards_path) + self._update_all_dashboards_from_dir(inject_dropdowns=inject_dropdowns) + + except InvalidDirectoryPathError as e: + logger.warning( + "Invalid Grafana dashboards folder at %s: %s", + e.grafana_dashboards_absolute_path, + e.message, + ) + stored_dashboard_templates: Any = self._stored.dashboard_templates # pyright: ignore + + for dashboard_id in list(stored_dashboard_templates.keys()): + if dashboard_id.startswith("file:"): + del stored_dashboard_templates[dashboard_id] + self._stored.dashboard_templates = stored_dashboard_templates + + # With all the file-based dashboards cleared out, force a refresh + # of relation data + if self._charm.unit.is_leader(): + for dashboard_relation in self._charm.model.relations[self._relation_name]: + self._upset_dashboards_on_relation(dashboard_relation) + + def _on_grafana_dashboard_relation_created(self, event: RelationCreatedEvent) -> None: + """Watch for a relation being created and automatically send dashboards. + + Args: + event: The :class:`RelationJoinedEvent` sent when a + `grafana_dashboaard` relationship is joined + """ + if self._charm.unit.is_leader(): + self._update_all_dashboards_from_dir() + self._upset_dashboards_on_relation(event.relation) + + def _on_grafana_dashboard_relation_changed(self, event: RelationChangedEvent) -> None: + """Watch for changes so we know if there's an error to signal back to the parent charm. + + Args: + event: The `RelationChangedEvent` that triggered this handler. + """ + if self._charm.unit.is_leader(): + data = json.loads(event.relation.data[event.app].get("event", "{}")) # type: ignore + + if not data: + return + + valid = bool(data.get("valid", True)) + errors = data.get("errors", []) + if valid and not errors: + self.on.dashboard_status_changed.emit(valid=valid) # pyright: ignore + else: + self.on.dashboard_status_changed.emit( # pyright: ignore + valid=valid, errors=errors + ) + + def _upset_dashboards_on_relation(self, relation: Relation) -> None: + """Update the dashboards in the relation data bucket.""" + # It's completely ridiculous to add a UUID, but if we don't have some + # pseudo-random value, this never makes it across 'juju set-state' + stored_data = { + "templates": type_convert_stored(self._stored.dashboard_templates), # pyright: ignore + "uuid": str(uuid.uuid4()), + } + + relation.data[self._charm.app]["dashboards"] = json.dumps(stored_data) + + @property + def _juju_topology(self) -> Dict: + return { + "model": self._charm.model.name, + "model_uuid": self._charm.model.uuid, + "application": self._charm.app.name, + "unit": self._charm.unit.name, + } + + @property + def dashboard_templates(self) -> List: + """Return a list of the known dashboard templates.""" + return list(self._stored.dashboard_templates.values()) # type: ignore + + +class GrafanaDashboardConsumer(Object): + """A consumer object for working with Grafana Dashboards.""" + + on = GrafanaDashboardEvents() # pyright: ignore + _stored = StoredState() + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + ) -> None: + """API to receive Grafana dashboards from charmed operators. + + The :class:`GrafanaDashboardConsumer` object provides an API + to consume dashboards provided by a charmed operator using the + :class:`GrafanaDashboardProvider` library. The + :class:`GrafanaDashboardConsumer` is integrated in a + charmed operator as follows: + + self.grafana = GrafanaDashboardConsumer(self) + + To use this library, you need a relation defined as follows in + your charm operator's metadata.yaml: + + requires: + grafana-dashboard: + interface: grafana_dashboard + + If you would like to use a different relation name than + `grafana-dashboard`, you need to specify the relation name via the + `relation_name` argument. However, it is strongly advised not to + change the default, so that people deploying your charm will have + a consistent experience with all other charms that consume Grafana + dashboards. + + Args: + charm: a :class:`CharmBase` object which manages this + :class:`GrafanaProvider` object. Generally this is + `self` in the instantiating class. + relation_name: a :string: name of the relation managed by this + :class:`GrafanaDashboardConsumer`; it defaults to "grafana-dashboard". + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._transformer = CosTool(self._charm) + + self._stored.set_default(dashboards={}) # type: ignore + + self.framework.observe( + self._charm.on[self._relation_name].relation_changed, + self._on_grafana_dashboard_relation_changed, + ) + self.framework.observe( + self._charm.on[self._relation_name].relation_broken, + self._on_grafana_dashboard_relation_broken, + ) + self.framework.observe( + self._charm.on[DEFAULT_PEER_NAME].relation_changed, + self._on_grafana_peer_changed, + ) + + def get_dashboards_from_relation(self, relation_id: int) -> List: + """Get a list of known dashboards for one instance of the monitored relation. + + Args: + relation_id: the identifier of the relation instance, as returned by + :method:`ops.model.Relation.id`. + + Returns: a list of known dashboards coming from the provided relation instance. + """ + return [ + self._to_external_object(relation_id, dashboard) + for dashboard in self._get_stored_dashboards(relation_id) + ] + + def _on_grafana_dashboard_relation_changed(self, event: RelationChangedEvent) -> None: + """Handle relation changes in related providers. + + If there are changes in relations between Grafana dashboard consumers + and providers, this event handler (if the unit is the leader) will + get data for an incoming grafana-dashboard relation through a + :class:`GrafanaDashboardsChanged` event, and make the relation data + available in the app's datastore object. The Grafana charm can + then respond to the event to update its configuration. + """ + changes = False + if self._charm.unit.is_leader(): + changes = self._render_dashboards_and_signal_changed(event.relation) + + if changes: + self.on.dashboards_changed.emit() # pyright: ignore + + def _on_grafana_peer_changed(self, _: RelationChangedEvent) -> None: + """Emit dashboard events on peer events so secondary charm data updates.""" + if self._charm.unit.is_leader(): + return + self.on.dashboards_changed.emit() # pyright: ignore + + def update_dashboards(self, relation: Optional[Relation] = None) -> None: + """Re-establish dashboards on one or more relations. + + If something changes between this library and a datasource, try to re-establish + invalid dashboards and invalidate active ones. + + Args: + relation: a specific relation for which the dashboards have to be + updated. If not specified, all relations managed by this + :class:`GrafanaDashboardConsumer` will be updated. + """ + if self._charm.unit.is_leader(): + relations = ( + [relation] if relation else self._charm.model.relations[self._relation_name] + ) + + for relation in relations: + self._render_dashboards_and_signal_changed(relation) + + def _on_grafana_dashboard_relation_broken(self, event: RelationBrokenEvent) -> None: + """Update job config when providers depart. + + When a Grafana dashboard provider departs, the configuration + for that provider is removed from the list of dashboards + """ + if not self._charm.unit.is_leader(): + return + + self._remove_all_dashboards_for_relation(event.relation) + + def _render_dashboards_and_signal_changed(self, relation: Relation) -> bool: # type: ignore + """Validate a given dashboard. + + Verify that the passed dashboard data is able to be found in our list + of datasources and will render. If they do, let the charm know by + emitting an event. + + Args: + relation: Relation; The relation the dashboard is associated with. + + Returns: + a boolean indicating whether an event should be emitted + """ + other_app = relation.app + + raw_data = relation.data[other_app].get("dashboards", "") # pyright: ignore + + if not raw_data: + logger.warning( + "No dashboard data found in the %s:%s relation", + self._relation_name, + str(relation.id), + ) + return False + + data = json.loads(raw_data) + + # The only piece of data needed on this side of the relations is "templates" + templates = data.pop("templates") + + # The dashboards are WAY too big since this ultimately calls out to Juju to + # set the relation data, and it overflows the maximum argument length for + # subprocess, so we have to use b64, annoyingly. + # Worse, Python3 expects absolutely everything to be a byte, and a plain + # `base64.b64encode()` is still too large, so we have to go through hoops + # of encoding to byte, compressing with lzma, converting to base64 so it + # can be converted to JSON, then all the way back. + + rendered_dashboards = [] + relation_has_invalid_dashboards = False + + for _, (fname, template) in enumerate(templates.items()): + content = None + error = None + topology = template.get("juju_topology", {}) + try: + content = LZMABase64.decompress(template["content"]) + inject_dropdowns = template.get("inject_dropdowns", True) + content = self._manage_dashboard_uid(content, template) + content = CharmedDashboard._convert_dashboard_fields(content, inject_dropdowns) + + if topology: + content = CharmedDashboard._inject_labels(content, topology, self._transformer) + + content = LZMABase64.compress(content) + except lzma.LZMAError as e: + error = str(e) + relation_has_invalid_dashboards = True + except json.JSONDecodeError as e: + error = str(e.msg) + logger.warning("Invalid JSON in Grafana dashboard '{}': {}".format(fname, error)) + continue + + # Prepend the relation name and ID to the dashboard ID to avoid clashes with + # multiple relations with apps from the same charm, or having dashboards with + # the same ids inside their charm operators + rendered_dashboards.append( + { + "id": "{}:{}/{}".format(relation.name, relation.id, fname), + "original_id": fname, + "content": content if content else None, + "template": template, + "valid": (error is None), + "error": error, + } + ) + + if relation_has_invalid_dashboards: + self._remove_all_dashboards_for_relation(relation) + + invalid_templates = [ + data["original_id"] for data in rendered_dashboards if not data["valid"] + ] + + logger.warning( + "Cannot add one or more Grafana dashboards from relation '{}:{}': the following " + "templates are invalid: {}".format( + relation.name, + relation.id, + invalid_templates, + ) + ) + + relation.data[self._charm.app]["event"] = json.dumps( + { + "errors": [ + { + "dashboard_id": rendered_dashboard["original_id"], + "error": rendered_dashboard["error"], + } + for rendered_dashboard in rendered_dashboards + if rendered_dashboard["error"] + ] + } + ) + + # Dropping dashboards for a relation needs to be signalled + return True + + stored_data = rendered_dashboards + currently_stored_data = self._get_stored_dashboards(relation.id) + + coerced_data = type_convert_stored(currently_stored_data) if currently_stored_data else {} + + if not coerced_data == stored_data: + stored_dashboards = self.get_peer_data("dashboards") + stored_dashboards[str(relation.id)] = stored_data + self.set_peer_data("dashboards", stored_dashboards) + return True + return None # type: ignore + + def _manage_dashboard_uid(self, dashboard: str, template: dict) -> str: + """Add an uid to the dashboard if it is not present.""" + dashboard_dict = json.loads(dashboard) + + if not dashboard_dict.get("uid", None) and "dashboard_alt_uid" in template: + dashboard_dict["uid"] = template["dashboard_alt_uid"] + + return json.dumps(dashboard_dict) + + def _remove_all_dashboards_for_relation(self, relation: Relation) -> None: + """If an errored dashboard is in stored data, remove it and trigger a deletion.""" + if self._get_stored_dashboards(relation.id): + stored_dashboards = self.get_peer_data("dashboards") + stored_dashboards.pop(str(relation.id)) + self.set_peer_data("dashboards", stored_dashboards) + self.on.dashboards_changed.emit() # pyright: ignore + + def _to_external_object(self, relation_id, dashboard): + decompressed = LZMABase64.decompress(dashboard["content"]) + as_dict = json.loads(decompressed) + + dashboard_title = as_dict.get("title", "") + dashboard_uid = as_dict.get("uid", "") + + try: + dashboard_version = int(as_dict["version"]) + except (KeyError, ValueError): + logger.warning("Dashboard '%s' (uid '%s') is missing a '.version' field or is invalid (must be integer); using '0' as fallback", dashboard_title, dashboard_uid) + dashboard_version = 0 + + return { + "id": dashboard["original_id"], + "relation_id": relation_id, + "charm": dashboard["template"]["charm"], + "content": decompressed, + "dashboard_uid": dashboard_uid, + "dashboard_version": dashboard_version, + "dashboard_title": dashboard_title, + } + + @property + def dashboards(self) -> List[Dict]: + """Get a list of known dashboards across all instances of the monitored relation. + + Filters out dashboards with the same uid, keeping only the one with the highest version. + When more than one dashboard have the same uid and version, keep the first one when + sorted by (relation_id, content) in reverse lexicographic order (highest relid first). + + Returns: a list of known dashboards. The JSON of each of the dashboards is available + in the `content` field of the corresponding `dict`. + """ + d: Dict[str, dict] = {} + + for _, (relation_id, dashboards_for_relation) in enumerate( + self.get_peer_data("dashboards").items() + ): + for dashboard in dashboards_for_relation: + obj = self._to_external_object(relation_id, dashboard) + + key = obj.get("dashboard_uid") + if key is None or str(key).strip() == "": + # At this point, we assume that a `.uid` is present so we do not render a fallback identifier here. Instead, we omit it. + logger.error("dashboard '%s' from relation id '%s' is missing a '.uid' field; omitted", obj["dashboard_title"], obj["relation_id"]) + continue + + if key in d: + d[key] = max(d[key], obj, key=lambda o: (o["dashboard_version"], o["relation_id"], o["content"])) + logger.warning("deduplicate dashboard '%s' (uid '%s') - kept version '%s' from relation id '%s'", d[key]["dashboard_title"], d[key]["dashboard_uid"], d[key]["dashboard_version"], d[key]["relation_id"]) + else: + d[key] = obj + + return list(d.values()) + + def _get_stored_dashboards(self, relation_id: int) -> list: + """Pull stored dashboards out of the peer data bucket.""" + return self.get_peer_data("dashboards").get(str(relation_id), {}) + + def _set_default_data(self) -> None: + """Set defaults if they are not in peer relation data.""" + data = {"dashboards": {}} # type: ignore + for k, v in data.items(): + if not self.get_peer_data(k): + self.set_peer_data(k, v) + + def set_peer_data(self, key: str, data: Any) -> None: + """Put information into the peer data bucket instead of `StoredState`.""" + peers = self._charm.peers # type: ignore[attr-defined] + if not peers or not peers.data: + logger.info("set_peer_data: no peer relation. Is the charm being installed/removed?") + return + peers.data[self._charm.app][key] = json.dumps(data) # type: ignore[attr-defined] + + def get_peer_data(self, key: str) -> Any: + """Retrieve information from the peer data bucket instead of `StoredState`.""" + peers = self._charm.peers # type: ignore[attr-defined] + if not peers or not peers.data: + logger.warning( + "get_peer_data: no peer relation. Is the charm being installed/removed?" + ) + return {} + data = peers.data[self._charm.app].get(key, "") + return json.loads(data) if data else {} + + +class GrafanaDashboardAggregator(Object): + """API to retrieve Grafana dashboards from machine dashboards. + + The :class:`GrafanaDashboardAggregator` object provides a way to + collate and aggregate Grafana dashboards from reactive/machine charms + and transport them into Charmed Operators, using Juju topology. + For detailed usage instructions, see the documentation for + :module:`cos-proxy-operator`, as this class is intended for use as a + single point of intersection rather than use in individual charms. + + Since :class:`GrafanaDashboardAggregator` serves as a bridge between + Canonical Observability Stack Charmed Operators and Reactive Charms, + deployed in a Reactive Juju model, both a target relation which is + used to collect events from Reactive charms and a `grafana_relation` + which is used to send the collected data back to the Canonical + Observability Stack are required. + + In its most streamlined usage, :class:`GrafanaDashboardAggregator` is + integrated in a charmed operator as follows: + self.grafana = GrafanaDashboardAggregator(self) + + Args: + charm: a :class:`CharmBase` object which manages this + :class:`GrafanaProvider` object. Generally this is + `self` in the instantiating class. + target_relation: a :string: name of a relation managed by this + :class:`GrafanaDashboardAggregator`, which is used to communicate + with reactive/machine charms it defaults to "dashboards". + grafana_relation: a :string: name of a relation used by this + :class:`GrafanaDashboardAggregator`, which is used to communicate + with charmed grafana. It defaults to "downstream-grafana-dashboard" + """ + + _stored = StoredState() + on = GrafanaProviderEvents() # pyright: ignore + + def __init__( + self, + charm: CharmBase, + target_relation: str = "dashboards", + grafana_relation: str = "downstream-grafana-dashboard", + ): + super().__init__(charm, grafana_relation) + + # Reactive charms may be RPC-ish and not leave reliable data around. Keep + # StoredState here + self._stored.set_default( # type: ignore + dashboard_templates={}, + id_mappings={}, + ) + + self._charm = charm + self._target_relation = target_relation + self._grafana_relation = grafana_relation + + self.framework.observe( + self._charm.on[self._grafana_relation].relation_joined, + self._update_remote_grafana, + ) + self.framework.observe( + self._charm.on[self._grafana_relation].relation_changed, + self._update_remote_grafana, + ) + self.framework.observe( + self._charm.on[self._target_relation].relation_changed, + self.update_dashboards, + ) + self.framework.observe( + self._charm.on[self._target_relation].relation_broken, + self.remove_dashboards, + ) + + def update_dashboards(self, event: RelationEvent) -> None: + """If we get a dashboard from a reactive charm, parse it out and update.""" + if self._charm.unit.is_leader(): + self._upset_dashboards_on_event(event) + + def _upset_dashboards_on_event(self, event: RelationEvent) -> None: + """Update the dashboards in the relation data bucket.""" + dashboards = self._handle_reactive_dashboards(event) + + if not dashboards: + logger.warning( + "Could not find dashboard data after a relation change for {}".format(event.app) + ) + return + + for id in dashboards: + self._stored.dashboard_templates[id] = CharmedDashboard._content_to_dashboard_object( # type: ignore + charm_name=event.app.name, + content=dashboards[id], + inject_dropdowns=True, + juju_topology=self._hybrid_topology(event), + ) + + self._stored.id_mappings[event.app.name] = dashboards # type: ignore + self._update_remote_grafana(event) + + def _update_remote_grafana(self, _: Optional[RelationEvent] = None) -> None: + """Push dashboards to the downstream Grafana relation.""" + # It's still ridiculous to add a UUID here, but needed + stored_data = { + "templates": type_convert_stored(self._stored.dashboard_templates), # pyright: ignore + "uuid": str(uuid.uuid4()), + } + + if self._charm.unit.is_leader(): + for grafana_relation in self.model.relations[self._grafana_relation]: + grafana_relation.data[self._charm.app]["dashboards"] = json.dumps(stored_data) + + def remove_dashboards(self, event: RelationBrokenEvent) -> None: + """Remove a dashboard if the relation is broken.""" + app_ids = type_convert_stored(self._stored.id_mappings.get(event.app.name, "")) # type: ignore + + if not app_ids: + logger.info("Could not look up stored dashboards for %s", event.app.name) # type: ignore + return + + del self._stored.id_mappings[event.app.name] # type: ignore + for id in app_ids: + del self._stored.dashboard_templates[id] # type: ignore + + stored_data = { + "templates": type_convert_stored(self._stored.dashboard_templates), # pyright: ignore + "uuid": str(uuid.uuid4()), + } + + if self._charm.unit.is_leader(): + for grafana_relation in self.model.relations[self._grafana_relation]: + grafana_relation.data[self._charm.app]["dashboards"] = json.dumps(stored_data) + + # Yes, this has a fair amount of branching. It's not that complex, though + def _strip_existing_datasources(self, dash: dict) -> dict: # noqa: C901 + """Remove existing reactive charm datasource templating out. + + This method iterates through *known* places where reactive charms may set + data in contributed dashboards and removes them. + + `dashboard["__inputs"]` is a property sometimes set when exporting dashboards from + the Grafana UI. It is not present in earlier Grafana versions, and can be disabled + in 5.3.4 and above (optionally). If set, any values present will be substituted on + import. Some reactive charms use this for Prometheus. COS uses dropdown selectors + for datasources, and leaving this present results in "default" datasource values + which are broken. + + Similarly, `dashboard["templating"]["list"][N]["name"] == "host"` can be used to + set a `host` variable for use in dashboards which is not meaningful in the context + of Juju topology and will yield broken dashboards. + + Further properties may be discovered. + """ + try: + if "list" in dash["templating"]: + for i in range(len(dash["templating"]["list"])): + if ( + "datasource" in dash["templating"]["list"][i] + and dash["templating"]["list"][i]["datasource"] is not None + ): + if "Juju" in dash["templating"]["list"][i].get("datasource", ""): + dash["templating"]["list"][i]["datasource"] = r"${prometheusds}" + + # Strip out newly-added 'juju_application' template variables which + # don't line up with our drop-downs + dash_mutable = dash + for i in range(len(dash["templating"]["list"])): + if ( + "name" in dash["templating"]["list"][i] + and dash["templating"]["list"][i].get("name", "") == "app" + ): + del dash_mutable["templating"]["list"][i] + + if dash_mutable: + dash = dash_mutable + except KeyError: + logger.debug("No existing templating data in dashboard") + + if "__inputs" in dash: + inputs = dash + for i in range(len(dash["__inputs"])): + if dash["__inputs"][i].get("pluginName", "") == "Prometheus": + del inputs["__inputs"][i] + if inputs: + dash["__inputs"] = inputs["__inputs"] + else: + del dash["__inputs"] + + return dash + + def _handle_reactive_dashboards(self, event: RelationEvent) -> Optional[Dict]: + """Look for a dashboard in relation data (during a reactive hook) or builtin by name.""" + if not self._charm.unit.is_leader(): + return {} + + templates = [] + id = "" + + # Reactive data can reliably be pulled out of events. In theory, if we got an event, + # it's on the bucket, but using event explicitly keeps the mental model in + # place for reactive + for k in event.relation.data[event.unit].keys(): # type: ignore + if k.startswith("request_"): + templates.append(json.loads(event.relation.data[event.unit][k])["dashboard"]) # type: ignore + + for k in event.relation.data[event.app].keys(): # type: ignore + if k.startswith("request_"): + templates.append(json.loads(event.relation.data[event.app][k])["dashboard"]) # type: ignore + + builtins = self._maybe_get_builtin_dashboards(event) + + if not templates and not builtins: + logger.warning("NOTHING!") + return {} + + dashboards = {} + for t in templates: + # This seems ridiculous, too, but to get it from a "dashboards" key in serialized JSON + # in the bucket back out to the actual "dashboard" we _need_, this is the way + # This is not a mistake -- there's a double nesting in reactive charms, and + # Grafana won't load it. We have to unbox: + # event.relation.data[event.]["request_*"]["dashboard"]["dashboard"], + # and the final unboxing is below. + # + # Apparently SOME newer dashboards (such as Ceph) do not have this double nesting, so + # now we get to account for both :toot: + dash = t.get("dashboard", {}) or t + + # Replace values with LMA-style templating + dash = self._strip_existing_datasources(dash) + dash = json.dumps(dash) + + # Replace the old-style datasource templates + dash = re.sub(r"<< datasource >>", r"${prometheusds}", dash) + dash = re.sub(r'"datasource": "prom.*?"', r'"datasource": "${prometheusds}"', dash) + dash = re.sub( + r'"datasource": "\$datasource"', r'"datasource": "${prometheusds}"', dash + ) + dash = re.sub(r'"uid": "\$datasource"', r'"uid": "${prometheusds}"', dash) + dash = re.sub( + r'"datasource": "(!?\w)[\w|\s|-]+?Juju generated.*?"', + r'"datasource": "${prometheusds}"', + dash, + ) + + # Yank out "new"+old LMA topology + dash = re.sub( + r'(,?\s?juju_application=~)\\"\$app\\"', r'\1\\"$juju_application\\"', dash + ) + + # Replace old piechart panels + dash = re.sub(r'"type": "grafana-piechart-panel"', '"type": "piechart"', dash) + + from jinja2 import DebugUndefined, Template + + content = LZMABase64.compress( + Template(dash, undefined=DebugUndefined).render(datasource=r"${prometheusds}") # type: ignore + ) + id = "prog:{}".format(content[-24:-16]) + + dashboards[id] = content + return {**builtins, **dashboards} + + def _maybe_get_builtin_dashboards(self, event: RelationEvent) -> Dict: + """Tries to match the event with an included dashboard. + + Scans dashboards packed with the charm instantiating this class, and tries to match + one with the event. There is no guarantee that any given event will match a builtin, + since each charm instantiating this class may include a different set of dashboards, + or none. + """ + builtins = {} + dashboards_path = None + + try: + dashboards_path = _resolve_dir_against_charm_path( + self._charm, "src/grafana_dashboards" + ) + except InvalidDirectoryPathError as e: + logger.warning( + "Invalid Grafana dashboards folder at %s: %s", + e.grafana_dashboards_absolute_path, + e.message, + ) + + if dashboards_path: + builtins.update( + CharmedDashboard.load_dashboards_from_dir( + dashboards_path=Path(dashboards_path), + charm_name=event.app.name, + charm_dir=self._charm.charm_dir, + inject_dropdowns=True, + juju_topology=self._hybrid_topology(event), + path_filter=lambda path: event.app.name in path.name, + ) + ) + + return builtins + + def _hybrid_topology(self, event: RelationEvent) -> Dict: + return { + "model": self._charm.model.name, + "model_uuid": self._charm.model.uuid, + "application": event.app.name, # type: ignore + "unit": event.unit.name, # type: ignore + } + + +class CosTool: + """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" + + _path = None + _disabled = False + + def __init__(self, charm): + self._charm = charm + + @property + def path(self): + """Lazy lookup of the path of cos-tool.""" + if self._disabled: + return None + if not self._path: + self._path = self._get_tool_path() + if not self._path: + logger.debug("Skipping injection of juju topology as label matchers") + self._disabled = True + return self._path + + def apply_label_matchers(self, rules: dict, type: str) -> dict: + """Will apply label matchers to the expression of all alerts in all supplied groups.""" + if not self.path: + return rules + for group in rules["groups"]: + rules_in_group = group.get("rules", []) + for rule in rules_in_group: + topology = {} + # if the user for some reason has provided juju_unit, we'll need to honor it + # in most cases, however, this will be empty + for label in [ + "juju_model", + "juju_model_uuid", + "juju_application", + "juju_charm", + "juju_unit", + ]: + if label in rule["labels"]: + topology[label] = rule["labels"][label] + + rule["expr"] = self.inject_label_matchers(rule["expr"], topology, type) + return rules + + def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: + """Will validate correctness of alert rules, returning a boolean and any errors.""" + if not self.path: + logger.debug("`cos-tool` unavailable. Not validating alert correctness.") + return True, "" + + with tempfile.TemporaryDirectory() as tmpdir: + rule_path = Path(tmpdir + "/validate_rule.yaml") + + # Smash "our" rules format into what upstream actually uses, which is more like: + # + # groups: + # - name: foo + # rules: + # - alert: SomeAlert + # expr: up + # - alert: OtherAlert + # expr: up + transformed_rules = {"groups": []} # type: ignore + for rule in rules["groups"]: + transformed = {"name": str(uuid.uuid4()), "rules": [rule]} + transformed_rules["groups"].append(transformed) + + rule_path.write_text(yaml.dump(transformed_rules)) + + args = [str(self.path), "validate", str(rule_path)] + # noinspection PyBroadException + try: + self._exec(args) + return True, "" + except subprocess.CalledProcessError as e: + logger.debug("Validating the rules failed: %s", e.output) + return False, ", ".join([line for line in e.output if "error validating" in line]) + + def inject_label_matchers(self, expression: str, topology: dict, type: str) -> str: + """Add label matchers to an expression.""" + if not topology: + return expression + if not self.path: + logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) + return expression + args = [str(self.path), "--format", type, "transform"] + + variable_topology = {k: "${}".format(k) for k in topology.keys()} + args.extend( + [ + "--label-matcher={}={}".format(key, value) + for key, value in variable_topology.items() + ] + ) + + # Pass a leading "--" so expressions with a negation or subtraction aren't interpreted as + # flags + args.extend(["--", "{}".format(expression)]) + # noinspection PyBroadException + try: + return re.sub(r'="\$juju', r'=~"$juju', self._exec(args)) + except subprocess.CalledProcessError as e: + logger.debug('Applying the expression failed: "%s", falling back to the original', e) + return expression + + def _get_tool_path(self) -> Optional[Path]: + arch = platform.machine() + arch = "amd64" if arch == "x86_64" else arch + res = "cos-tool-{}".format(arch) + try: + path = Path(res).resolve(strict=True) + return path + except (FileNotFoundError, OSError): + logger.debug('Could not locate cos-tool at: "{}"'.format(res)) + return None + + def _exec(self, cmd) -> str: + result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) + output = result.stdout.decode("utf-8").strip() + return output diff --git a/charms/garm-operator/lib/charms/loki_k8s/v1/loki_push_api.py b/charms/garm-operator/lib/charms/loki_k8s/v1/loki_push_api.py new file mode 100644 index 00000000..f8218d82 --- /dev/null +++ b/charms/garm-operator/lib/charms/loki_k8s/v1/loki_push_api.py @@ -0,0 +1,2534 @@ +#!/usr/bin/env python3 +# Copyright 2023 Canonical Ltd. +# See LICENSE file for licensing details. +# +# Learn more at: https://juju.is/docs/sdk + +r"""## Overview. + +This document explains how to use the two principal objects this library provides: + +- `LokiPushApiProvider`: This object is meant to be used by any Charmed Operator that needs to +implement the provider side of the `loki_push_api` relation interface. For instance, a Loki charm. +The provider side of the relation represents the server side, to which logs are being pushed. + +- `LokiPushApiConsumer`: This object is meant to be used by any Charmed Operator that needs to +send log to Loki by implementing the consumer side of the `loki_push_api` relation interface. +For instance, a Promtail or Grafana agent charm which needs to send logs to Loki. + +- `LogProxyConsumer`: DEPRECATED. +This object can be used by any Charmed Operator which needs to send telemetry, such as logs, to +Loki through a Log Proxy by implementing the consumer side of the `loki_push_api` relation +interface. +In order to be able to control the labels on the logs pushed this object adds a Pebble layer +that runs Promtail in the workload container, injecting Juju topology labels into the +logs on the fly. +This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 LTS. + +- `LogForwarder`: This object can be used by any Charmed Operator which needs to send the workload +standard output (stdout) through Pebble's log forwarding mechanism, to Loki endpoints through the +`loki_push_api` relation interface. +In order to be able to control the labels on the logs pushed this object updates the pebble layer's +"log-targets" section with Juju topology. + +Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju +topology labels are used to uniquely identify the workload which generates telemetry like logs. + + +## LokiPushApiProvider Library Usage + +This object may be used by any Charmed Operator which implements the `loki_push_api` interface. +For instance, Loki or Grafana Agent. + +For this purpose a charm needs to instantiate the `LokiPushApiProvider` object with one mandatory +and three optional arguments. + +- `charm`: A reference to the parent (Loki) charm. + +- `relation_name`: The name of the relation that the charm uses to interact + with its clients, which implement `LokiPushApiConsumer` `LogForwarder`, or `LogProxyConsumer` + (note that LogProxyConsumer is deprecated). + + If provided, this relation name must match a provided relation in metadata.yaml with the + `loki_push_api` interface. + + The default relation name is "logging" for `LokiPushApiConsumer` and `LogForwarder`, and + "log-proxy" for `LogProxyConsumer` (note that LogProxyConsumer is deprecated). + + For example, a provider's `metadata.yaml` file may look as follows: + + ```yaml + provides: + logging: + interface: loki_push_api + ``` + + Subsequently, a Loki charm may instantiate the `LokiPushApiProvider` in its constructor as + follows: + + from charms.loki_k8s.v1.loki_push_api import LokiPushApiProvider + from loki_server import LokiServer + ... + + class LokiOperatorCharm(CharmBase): + ... + + def __init__(self, *args): + super().__init__(*args) + ... + external_url = urlparse(self._external_url) + self.loki_provider = LokiPushApiProvider( + self, + port=external_url.port or 80, + scheme=external_url.scheme, + path=f"{external_url.path}/loki/api/v1/push", + ) + ... + + - `port`: Loki Push Api endpoint port. Default value: `3100`. + - `scheme`: Loki Push Api endpoint scheme (`HTTP` or `HTTPS`). Default value: `HTTP` + - `address`: Loki Push Api endpoint address. Default value: `localhost` + - `path`: Loki Push Api endpoint path. Default value: `loki/api/v1/push` + + +The `LokiPushApiProvider` object has several responsibilities: + +1. Set the URL of the Loki Push API in the relation application data bag; the URL + must be unique to all instances (e.g. using a load balancer). + The default URL is the FQDN, but this can be overridden by calling `update_endpoint()`. + +2. Set the Promtail binary URL (`promtail_binary_zip_url`) so clients that use + `LogProxyConsumer` object could download and configure it. + +3. Process the metadata of the consumer application, provided via the + "metadata" field of the consumer data bag, which are used to annotate the + alert rules (see next point). An example for "metadata" is the following: + + {'model': 'loki', + 'model_uuid': '0b7d1071-ded2-4bf5-80a3-10a81aeb1386', + 'application': 'promtail-k8s' + } + +4. Process alert rules set into the relation by the `LokiPushApiConsumer` + objects, e.g.: + + '{ + "groups": [{ + "name": "loki_0b7d1071-ded2-4bf5-80a3-10a81aeb1386_promtail-k8s_alerts", + "rules": [{ + "alert": "HighPercentageError", + "expr": "sum(rate({app=\\"foo\\", env=\\"production\\"} |= \\"error\\" [5m])) + by (job) \\n /\\nsum(rate({app=\\"foo\\", env=\\"production\\"}[5m])) + by (job)\\n > 0.05 + \\n", "for": "10m", + "labels": { + "severity": "page", + "juju_model": "loki", + "juju_model_uuid": "0b7d1071-ded2-4bf5-80a3-10a81aeb1386", + "juju_application": "promtail-k8s" + }, + "annotations": { + "summary": "High request latency" + } + }] + }] + }' + + +Once these alert rules are sent over relation data, the `LokiPushApiProvider` object +stores these files in the directory `/loki/rules` inside the Loki charm container. After +storing alert rules files, the object will check alert rules by querying Loki API +endpoint: [`loki/api/v1/rules`](https://grafana.com/docs/loki/latest/api/#list-rule-groups). +If there are changes in the alert rules a `loki_push_api_alert_rules_changed` event will +be emitted with details about the `RelationEvent` which triggered it. + +This events should be observed in the charm that uses `LokiPushApiProvider`: + +```python + def __init__(self, *args): + super().__init__(*args) + ... + self.loki_provider = LokiPushApiProvider(self) + self.framework.observe( + self.loki_provider.on.loki_push_api_alert_rules_changed, + self._loki_push_api_alert_rules_changed, + ) +``` + + +## LokiPushApiConsumer Library Usage + +This Loki charm interacts with its clients using the Loki charm library. Charms +seeking to send log to Loki, must do so using the `LokiPushApiConsumer` object from +this charm library. + +> **NOTE**: `LokiPushApiConsumer` also depends on an additional charm library. +> +> Ensure sure you `charmcraft fetch-lib charms.observability_libs.v0.juju_topology` +> when using this library. + +For the simplest use cases, using the `LokiPushApiConsumer` object only requires +instantiating it, typically in the constructor of your charm (the one which +sends logs). + +```python +from charms.loki_k8s.v1.loki_push_api import LokiPushApiConsumer + +class LokiClientCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + ... + self._loki_consumer = LokiPushApiConsumer(self) +``` + +The `LokiPushApiConsumer` constructor requires two things: + +- A reference to the parent (LokiClientCharm) charm. + +- Optionally, the name of the relation that the Loki charm uses to interact + with its clients. If provided, this relation name must match a required + relation in metadata.yaml with the `loki_push_api` interface. + + If not provided, the relation name defaults to `logging`. + +Any time the relation between a Loki provider charm and a Loki consumer charm is +established, a `LokiPushApiEndpointJoined` event is fired. In the consumer side +is it possible to observe this event with: + +```python + +self.framework.observe( + self._loki_consumer.on.loki_push_api_endpoint_joined, + self._on_loki_push_api_endpoint_joined, +) +``` + +Any time there are departures in relations between the consumer charm and Loki +the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event, for instance: + +```python +self.framework.observe( + self._loki_consumer.on.loki_push_api_endpoint_departed, + self._on_loki_push_api_endpoint_departed, +) +``` + +The consumer charm can then choose to update its configuration in both situations. + +Note that LokiPushApiConsumer does not add any labels automatically on its own. In +order to better integrate with the Canonical Observability Stack, you may want to configure your +software to add Juju topology labels. The +[observability-libs](https://charmhub.io/observability-libs) library can be used to get topology +labels in charm code. See :func:`LogProxyConsumer._scrape_configs` for an example of how +to do this with promtail. + +## LogProxyConsumer Library Usage + +> Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 +> LTS. + +Let's say that we have a workload charm that produces logs, and we need to send those logs to a +workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`. + +Adopting this object in a Charmed Operator consist of two steps: + +1. Use the `LogProxyConsumer` class by instantiating it in the `__init__` method of the charmed + operator. There are two ways to get logs in to promtail. You can give it a list of files to + read, or you can write to it using the syslog protocol. + + For example: + + ```python + from charms.loki_k8s.v1.loki_push_api import LogProxyConsumer + + ... + + def __init__(self, *args): + ... + self._log_proxy = LogProxyConsumer( + self, + logs_scheme={ + "workload-a": { + "log-files": ["/tmp/worload-a-1.log", "/tmp/worload-a-2.log"], + "syslog-port": 1514, + }, + "workload-b": {"log-files": ["/tmp/worload-b.log"], "syslog-port": 1515}, + }, + relation_name="log-proxy", + ) + self.framework.observe( + self._log_proxy.on.promtail_digest_error, + self._promtail_error, + ) + + def _promtail_error(self, event): + logger.error(event.message) + self.unit.status = BlockedStatus(event.message) + ``` + + Any time the relation between a provider charm and a LogProxy consumer charm is + established, a `LogProxyEndpointJoined` event is fired. In the consumer side is it + possible to observe this event with: + + ```python + + self.framework.observe( + self._log_proxy.on.log_proxy_endpoint_joined, + self._on_log_proxy_endpoint_joined, + ) + ``` + + Any time there are departures in relations between the consumer charm and the provider + the consumer charm is informed, through a `LogProxyEndpointDeparted` event, for instance: + + ```python + self.framework.observe( + self._log_proxy.on.log_proxy_endpoint_departed, + self._on_log_proxy_endpoint_departed, + ) + ``` + + The consumer charm can then choose to update its configuration in both situations. + + Note that: + + - You can configure your syslog software using `localhost` as the address and the method + `LogProxyConsumer.syslog_port("container_name")` to get the port, or, alternatively, if you are using rsyslog + you may use the method `LogProxyConsumer.rsyslog_config("container_name")`. + +2. Modify the `metadata.yaml` file to add: + + - The `log-proxy` relation in the `requires` section: + ```yaml + requires: + log-proxy: + interface: loki_push_api + optional: true + ``` + +Once the library is implemented in a Charmed Operator and a relation is established with +the charm that implements the `loki_push_api` interface, the library will inject a +Pebble layer that runs Promtail in the workload container to send logs. + +By default, the promtail binary injected into the container will be downloaded from the internet. +If, for any reason, the container has limited network access, you may allow charm administrators +to provide their own promtail binary at runtime by adding the following snippet to your charm +metadata: + +```yaml +resources: + promtail-bin: + type: file + description: Promtail binary for logging + filename: promtail-linux +``` + +Which would then allow operators to deploy the charm this way: + +``` +juju deploy \ + ./your_charm.charm \ + --resource promtail-bin=/tmp/promtail-linux-amd64 +``` + +If a different resource name is used, it can be specified with the `promtail_resource_name` +argument to the `LogProxyConsumer` constructor. + +The object can emit a `PromtailDigestError` event: + +- Promtail binary cannot be downloaded. +- The sha256 sum mismatch for promtail binary. + +The object can raise a `ContainerNotFoundError` event: + +- No `container_name` parameter has been specified and the Pod has more than 1 container. + +These can be monitored via the PromtailDigestError events via: + +```python + self.framework.observe( + self._loki_consumer.on.promtail_digest_error, + self._promtail_error, + ) + + def _promtail_error(self, event): + logger.error(msg) + self.unit.status = BlockedStatus(event.message) + ) +``` + +## LogForwarder class Usage + +Let's say that we have a charm's workload that writes logs to the standard output (stdout), +and we need to send those logs to a workload implementing the `loki_push_api` interface, +such as `Loki` or `Grafana Agent`. To know how to reach a Loki instance, a charm would +typically use the `loki_push_api` interface. + +Use the `LogForwarder` class by instantiating it in the `__init__` method of the charm: + +```python +from charms.loki_k8s.v1.loki_push_api import LogForwarder + +... + + def __init__(self, *args): + ... + self._log_forwarder = LogForwarder( + self, + relation_name="logging" # optional, defaults to `logging` + ) +``` + +The `LogForwarder` by default will observe relation events on the `logging` endpoint and +enable/disable log forwarding automatically. +Next, modify the `metadata.yaml` file to add: + +The `log-forwarding` relation in the `requires` section: +```yaml +requires: + logging: + interface: loki_push_api + optional: true +``` + +Once the LogForwader class is implemented in your charm and the relation (implementing the +`loki_push_api` interface) is active and healthy, the library will inject a Pebble layer in +each workload container the charm has access to, to configure Pebble's log forwarding +feature and start sending logs to Loki. + +## Alerting Rules + +This charm library also supports gathering alerting rules from all related Loki client +charms and enabling corresponding alerts within the Loki charm. Alert rules are +automatically gathered by `LokiPushApiConsumer` object from a directory conventionally +named `loki_alert_rules`. + +This directory must reside at the top level in the `src` folder of the +consumer charm. Each file in this directory is assumed to be a single alert rule +in YAML format. The file name must have one of the following extensions: `.yaml`, `.yml`, `.rule`, or `.rules`. +The format of this alert rule conforms to the +[Loki docs](https://grafana.com/docs/loki/latest/rules/#alerting-rules). + +An example of the contents of one such file is shown below. + +```yaml +alert: HighPercentageError +expr: | + sum(rate({%%juju_topology%%} |= "error" [5m])) by (job) + / + sum(rate({%%juju_topology%%}[5m])) by (job) + > 0.05 +for: 10m +labels: + severity: page +annotations: + summary: High request latency + +``` + +It is **critical** to use the `%%juju_topology%%` filter in the expression for the alert +rule shown above. This filter is a stub that is automatically replaced by the +`LokiPushApiConsumer` following Loki Client's Juju topology (application, model and its +UUID). Such a topology filter is essential to ensure that alert rules submitted by one +provider charm generates alerts only for that same charm. + +The Loki charm may be related to multiple Loki client charms. Without this, filter +rules submitted by one provider charm will also result in corresponding alerts for other +provider charms. Hence, every alert rule expression must include such a topology filter stub. + +Gathering alert rules and generating rule files within the Loki charm is easily done using +the `alerts()` method of `LokiPushApiProvider`. Alerts generated by Loki will automatically +include Juju topology labels in the alerts. These labels indicate the source of the alert. + +The following labels are automatically added to every alert + +- `juju_model` +- `juju_model_uuid` +- `juju_application` + + +Whether alert rules files does not contain the keys `alert` or `expr` or there is no alert +rules file in `alert_rules_path` a `loki_push_api_alert_rules_error` event is emitted. + +To handle these situations the event must be observed in the `LokiClientCharm` charm.py file: + +```python +class LokiClientCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + ... + self._loki_consumer = LokiPushApiConsumer(self) + + self.framework.observe( + self._loki_consumer.on.loki_push_api_alert_rules_error, + self._alert_rules_error + ) + + def _alert_rules_error(self, event): + self.unit.status = BlockedStatus(event.message) +``` + +## Relation Data + +The Loki charm uses both application and unit relation data to obtain information regarding +Loki Push API and alert rules. + +Units of consumer charm send their alert rules over app relation data using the `alert_rules` +key. + +## Charm logging +The `charms.loki_k8s.v0.charm_logging` library can be used in conjunction with this one to configure python's +logging module to forward all logs to Loki via the loki-push-api interface. + +```python +from lib.charms.loki_k8s.v0.charm_logging import log_charm +from lib.charms.loki_k8s.v1.loki_push_api import charm_logging_config, LokiPushApiConsumer + +@log_charm(logging_endpoint="my_endpoints", server_cert="cert_path") +class MyCharm(...): + _cert_path = "/path/to/cert/on/charm/container.crt" + def __init__(self, ...): + self.logging = LokiPushApiConsumer(...) + self.my_endpoints, self.cert_path = charm_logging_config( + self.logging, self._cert_path) +``` + +Do this, and all charm logs will be forwarded to Loki as soon as a relation is formed. +""" + +import copy +import json +import logging +import os +import platform +import re +import socket +import warnings +from copy import deepcopy +from gzip import GzipFile +from hashlib import sha256 +from io import BytesIO +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union, cast +from urllib import request +from urllib.error import URLError + +import yaml +from cosl import CosTool, JujuTopology +from cosl.rules import AlertRules +from cosl.types import OfficialRuleFileFormat +from ops.charm import ( + CharmBase, + HookEvent, + PebbleReadyEvent, + RelationBrokenEvent, + RelationCreatedEvent, + RelationDepartedEvent, + RelationEvent, + RelationJoinedEvent, + RelationRole, + WorkloadEvent, +) +from ops.framework import BoundEvent, EventBase, EventSource, Object, ObjectEvents +from ops.jujuversion import JujuVersion +from ops.model import Container, ModelError, Relation +from ops.pebble import APIError, ChangeError, Layer, PathError, ProtocolError + +# The unique Charmhub library identifier, never change it +LIBID = "bf76f23cdd03464b877c52bd1d2f563e" + +# Increment this major API version when introducing breaking changes +LIBAPI = 1 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 24 + +PYDEPS = ["cosl"] + +logger = logging.getLogger(__name__) + +RELATION_INTERFACE_NAME = "loki_push_api" +DEFAULT_RELATION_NAME = "logging" +DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/loki_alert_rules" +DEFAULT_LOG_PROXY_RELATION_NAME = "log-proxy" + +PROMTAIL_BASE_URL = "https://github.com/canonical/loki-k8s-operator/releases/download" +# To update Promtail version you only need to change the PROMTAIL_VERSION and +# update all sha256 sums in PROMTAIL_BINARIES. To support a new architecture +# you only need to add a new key value pair for the architecture in PROMTAIL_BINARIES. +PROMTAIL_VERSION = "v2.9.7" +PROMTAIL_ARM_BINARY = { + "filename": "promtail-static-arm64", + "zipsha": "c083fdb45e5c794103f974eeb426489b4142438d9e10d0ae272b2aff886e249b", + "binsha": "4cd055c477a301c0bdfdbcea514e6e93f6df5d57425ce10ffc77f3e16fec1ddf", +} + +PROMTAIL_BINARIES = { + "amd64": { + "filename": "promtail-static-amd64", + "zipsha": "6873cbdabf23062aeefed6de5f00ff382710332af3ab90a48c253ea17e08f465", + "binsha": "28da9b99f81296fe297831f3bc9d92aea43b4a92826b8ff04ba433b8cb92fb50", + }, + "arm64": PROMTAIL_ARM_BINARY, + "aarch64": PROMTAIL_ARM_BINARY, +} + +# Paths in `charm` container +BINARY_DIR = "/tmp" + +# Paths in `workload` container +WORKLOAD_BINARY_DIR = "/opt/promtail" +WORKLOAD_CONFIG_DIR = "/etc/promtail" +WORKLOAD_CONFIG_FILE_NAME = "promtail_config.yaml" +WORKLOAD_CONFIG_PATH = "{}/{}".format(WORKLOAD_CONFIG_DIR, WORKLOAD_CONFIG_FILE_NAME) +WORKLOAD_POSITIONS_PATH = "{}/positions.yaml".format(WORKLOAD_BINARY_DIR) +WORKLOAD_SERVICE_NAME = "promtail" + +# These are the initial port values. As we can have more than one container, +# we use odd and even numbers to avoid collisions. +# Each new container adds 2 to the previous value. +HTTP_LISTEN_PORT_START = 9080 # even start port +GRPC_LISTEN_PORT_START = 9095 # odd start port + + +class LokiPushApiError(Exception): + """Base class for errors raised by this module.""" + + +class RelationNotFoundError(LokiPushApiError): + """Raised if there is no relation with the given name.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + + super().__init__(self.message) + + +class RelationInterfaceMismatchError(LokiPushApiError): + """Raised if the relation with the given name has a different interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + super().__init__(self.message) + + +class RelationRoleMismatchError(LokiPushApiError): + """Raised if the relation with the given name has a different direction.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + super().__init__(self.message) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Verifies that a relation has the necessary characteristics. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + actual_relation_interface = relation.interface_name + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, + expected_relation_interface, + actual_relation_interface, # pyright: ignore + ) + + if expected_relation_role == RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role == RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +class InvalidAlertRulePathError(Exception): + """Raised if the alert rules folder cannot be found or is otherwise invalid.""" + + def __init__( + self, + alert_rules_absolute_path: Path, + message: str, + ): + self.alert_rules_absolute_path = alert_rules_absolute_path + self.message = message + + super().__init__(self.message) + + +def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: + """Resolve the provided path items against the directory of the main file. + + Look up the directory of the `main.py` file being executed. This is normally + going to be the charm.py file of the charm including this library. Then, resolve + the provided path elements and, if the result path exists and is a directory, + return its absolute path; otherwise, raise en exception. + + Raises: + InvalidAlertRulePathError, if the path does not exist or is not a directory. + """ + charm_dir = Path(str(charm.charm_dir)) + if not charm_dir.exists() or not charm_dir.is_dir(): + # Operator Framework does not currently expose a robust + # way to determine the top level charm source directory + # that is consistent across deployed charms and unit tests + # Hence for unit tests the current working directory is used + # TODO: updated this logic when the following ticket is resolved + # https://github.com/canonical/operator/issues/643 + charm_dir = Path(os.getcwd()) + + alerts_dir_path = charm_dir.absolute().joinpath(*path_elements) + + if not alerts_dir_path.exists(): + raise InvalidAlertRulePathError(alerts_dir_path, "directory does not exist") + if not alerts_dir_path.is_dir(): + raise InvalidAlertRulePathError(alerts_dir_path, "is not a directory") + + return str(alerts_dir_path) + + +class NoRelationWithInterfaceFoundError(Exception): + """No relations with the given interface are found in the charm meta.""" + + def __init__(self, charm: CharmBase, relation_interface: Optional[str] = None): + self.charm = charm + self.relation_interface = relation_interface + self.message = ( + "No relations with interface '{}' found in the meta of the '{}' charm".format( + relation_interface, charm.meta.name + ) + ) + + super().__init__(self.message) + + +class MultipleRelationsWithInterfaceFoundError(Exception): + """Multiple relations with the given interface are found in the charm meta.""" + + def __init__(self, charm: CharmBase, relation_interface: str, relations: list): + self.charm = charm + self.relation_interface = relation_interface + self.relations = relations + self.message = ( + "Multiple relations with interface '{}' found in the meta of the '{}' charm.".format( + relation_interface, charm.meta.name + ) + ) + super().__init__(self.message) + + +class LokiPushApiEndpointDeparted(EventBase): + """Event emitted when Loki departed.""" + + +class LokiPushApiEndpointJoined(EventBase): + """Event emitted when Loki joined.""" + + +class LokiPushApiAlertRulesChanged(EventBase): + """Event emitted if there is a change in the alert rules.""" + + def __init__(self, handle, relation, relation_id, app=None, unit=None): + """Pretend we are almost like a RelationEvent. + + Fields to serialize: + { + "relation_name": , + "relation_id": , + "app_name": , + "unit_name": + } + + In this way, we can transparently use `RelationEvent.snapshot()` to pass + it back if we need to log it. + """ + super().__init__(handle) + self.relation = relation + self.relation_id = relation_id + self.app = app + self.unit = unit + + def snapshot(self) -> Dict: + """Save event information.""" + if not self.relation: + return {} + snapshot = {"relation_name": self.relation.name, "relation_id": self.relation.id} + if self.app: + snapshot["app_name"] = self.app.name + if self.unit: + snapshot["unit_name"] = self.unit.name + return snapshot + + def restore(self, snapshot: dict): + """Restore event information.""" + self.relation = self.framework.model.get_relation( + snapshot["relation_name"], snapshot["relation_id"] + ) + app_name = snapshot.get("app_name") + if app_name: + self.app = self.framework.model.get_app(app_name) + else: + self.app = None + unit_name = snapshot.get("unit_name") + if unit_name: + self.unit = self.framework.model.get_unit(unit_name) + else: + self.unit = None + + +class InvalidAlertRuleEvent(EventBase): + """Event emitted when alert rule files are not parsable. + + Enables us to set a clear status on the provider. + """ + + def __init__(self, handle, errors: str = "", valid: bool = False): + super().__init__(handle) + self.errors = errors + self.valid = valid + + def snapshot(self) -> Dict: + """Save alert rule information.""" + return { + "valid": self.valid, + "errors": self.errors, + } + + def restore(self, snapshot): + """Restore alert rule information.""" + self.valid = snapshot["valid"] + self.errors = snapshot["errors"] + + +class LokiPushApiEvents(ObjectEvents): + """Event descriptor for events raised by `LokiPushApiProvider`.""" + + loki_push_api_endpoint_departed = EventSource(LokiPushApiEndpointDeparted) + loki_push_api_endpoint_joined = EventSource(LokiPushApiEndpointJoined) + loki_push_api_alert_rules_changed = EventSource(LokiPushApiAlertRulesChanged) + alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) + + +class LokiPushApiProvider(Object): + """A LokiPushApiProvider class.""" + + on = LokiPushApiEvents() # pyright: ignore + + def __init__( + self, + charm, + relation_name: str = DEFAULT_RELATION_NAME, + *, + port: Union[str, int] = 3100, + scheme: str = "http", + address: str = "", + path: str = "loki/api/v1/push", + ): + """A Loki service provider. + + Args: + charm: a `CharmBase` instance that manages this + instance of the Loki service. + relation_name: an optional string name of the relation between `charm` + and the Loki charmed service. The default is "logging". + It is strongly advised not to change the default, so that people + deploying your charm will have a consistent experience with all + other charms that consume metrics endpoints. + port: an optional port of the Loki service (default is "3100"). + scheme: an optional scheme of the Loki API URL (default is "http"). + address: DEPRECATED. This argument is ignored and will be removed in v2. + It is kept for backward compatibility. + Use `update_endpoint()` instead. + path: an optional path of the Loki API URL (default is "loki/api/v1/push") + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + + if address != "": + warnings.warn( + "The 'address' parameter is deprecated and will be removed in v2. " + "Use 'update_endpoint()' instead.", + DeprecationWarning, + stacklevel=2, + ) + + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._tool = CosTool("logql") + self.port = int(port) + self.scheme = scheme + self.path = path + self._custom_url = None + + events = self._charm.on[relation_name] + self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) + self.framework.observe(events.relation_joined, self._on_logging_relation_joined) + self.framework.observe(events.relation_changed, self._on_logging_relation_changed) + self.framework.observe(events.relation_departed, self._on_logging_relation_departed) + self.framework.observe(events.relation_broken, self._on_logging_relation_broken) + + def _on_lifecycle_event(self, _): + # Upgrade event or other charm-level event + should_update = False + for relation in self._charm.model.relations[self._relation_name]: + # Don't accidentally flip a True result back. + should_update = should_update or self._process_logging_relation_changed(relation) + if should_update: + # We don't have a RelationEvent, so build it up by hand + first_rel = self._charm.model.relations[self._relation_name][0] + self.on.loki_push_api_alert_rules_changed.emit( + relation=first_rel, + relation_id=first_rel.id, + ) + + def _on_logging_relation_joined(self, event: RelationJoinedEvent): + """Set basic data on relation joins. + + Set the promtail binary URL location, which will not change, and anything + else which may be required, but is static.. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must set its relation data. + """ + if self._charm.unit.is_leader(): + event.relation.data[self._charm.app].update(self._promtail_binary_url) + logger.debug("Saved promtail binary url: %s", self._promtail_binary_url) + + def _on_logging_relation_changed(self, event: HookEvent): + """Handle changes in related consumers. + + Anytime there are changes in the relation between Loki + and its consumers charms. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + """ + should_update = self._process_logging_relation_changed(event.relation) # pyright: ignore + if should_update: + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, # pyright: ignore + relation_id=event.relation.id, # pyright: ignore + app=self._charm.app, + unit=self._charm.unit, + ) + + def _on_logging_relation_broken(self, event: RelationBrokenEvent): + """Removes alert rules files when consumer charms left the relation with Loki. + + Args: + event: a `CharmEvent` in response to which the Loki + charm must update its relation data. + """ + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, + relation_id=event.relation.id, + app=self._charm.app, + unit=self._charm.unit, + ) + + def _on_logging_relation_departed(self, event: RelationDepartedEvent): + """Removes alert rules files when consumer charms left the relation with Loki. + + Args: + event: a `CharmEvent` in response to which the Loki + charm must update its relation data. + """ + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, + relation_id=event.relation.id, + app=self._charm.app, + unit=self._charm.unit, + ) + + def _should_update_alert_rules(self, relation) -> bool: + """Determine whether alert rules should be regenerated. + + If there are alert rules in the relation data bag, tell the charm + whether to regenerate them based on the boolean returned here. + """ + if relation.data.get(relation.app).get("alert_rules", None) is not None: + return True + return False + + def _process_logging_relation_changed(self, relation: Relation) -> bool: + """Handle changes in related consumers. + + Anytime there are changes in relations between Loki + and its consumers charms, Loki set the `loki_push_api` + into the relation data. Set the endpoint building + appropriately, and if there are alert rules present in + the relation, let the caller know. + Besides Loki generates alert rules files based what + consumer charms forwards, + + Args: + relation: the `Relation` instance to update. + + Returns: + A boolean indicating whether an event should be emitted, so we + only emit one on lifecycle events + """ + relation.data[self._charm.unit]["public_address"] = socket.getfqdn() or "" + self.update_endpoint(relation=relation) + return self._should_update_alert_rules(relation) + + @property + def _promtail_binary_url(self) -> dict: + """URL from which Promtail binary can be downloaded.""" + # construct promtail binary url paths from parts + promtail_binaries = {} + for arch, info in PROMTAIL_BINARIES.items(): + info["url"] = "{}/promtail-{}/{}.gz".format( + PROMTAIL_BASE_URL, PROMTAIL_VERSION, info["filename"] + ) + promtail_binaries[arch] = info + + return {"promtail_binary_zip_url": json.dumps(promtail_binaries)} + + def update_endpoint(self, url: str = "", relation: Optional[Relation] = None) -> None: + """Triggers programmatically the update of endpoint in unit relation data. + + This method should be used when the charm relying on this library needs + to update the relation data in response to something occurring outside + the `logging` relation lifecycle, e.g., in case of a + host address change because the charmed operator becomes connected to an + Ingress after the `logging` relation is established. + + To make this library reconciler-friendly, the endpoint URL was made sticky i.e., once the + endpoint is updated with a custom URL, using the public method, it cannot be unset. Users + of this method should set the "url" arg to an internal URL if the charms ingress is no + longer available. + + Args: + url: An optional url value to update relation data. + relation: An optional instance of `class:ops.model.Relation` to update. + """ + # if no relation is specified update all of them + if not relation: + if not self._charm.model.relations.get(self._relation_name): + return + + relations_list = self._charm.model.relations.get(self._relation_name) + else: + relations_list = [relation] + + if url: + self._custom_url = url + + endpoint = self._endpoint(self._custom_url or self._url) + + for relation in relations_list: + relation.data[self._charm.unit].update({"endpoint": json.dumps(endpoint)}) + + logger.debug("Saved endpoint in unit relation data") + + @property + def _url(self) -> str: + """Get local Loki Push API url. + + Return url to loki, including port number, but without the endpoint subpath. + """ + return f"{self.scheme}://{socket.getfqdn()}:{self.port}" + + def _endpoint(self, url) -> dict: + """Get Loki push API endpoint for a given url. + + Args: + url: A loki unit URL. + + Returns: str + """ + endpoint = "/loki/api/v1/push" + return {"url": url.rstrip("/") + endpoint} + + @property + def alerts(self) -> dict: # noqa: C901 + """Fetch alerts for all relations. + + A Loki alert rules file consists of a list of "groups". Each + group consists of a list of alerts (`rules`) that are sequentially + executed. This method returns all the alert rules provided by each + related metrics provider charm. These rules may be used to generate a + separate alert rules file for each relation since the returned list + of alert groups are indexed by relation ID. Also for each relation ID + associated scrape metadata such as Juju model, UUID and application + name are provided so a unique name may be generated for the rules + file. For each relation the structure of data returned is a dictionary + with four keys + + - groups + - model + - model_uuid + - application + + The value of the `groups` key is such that it may be used to generate + a Loki alert rules file directly using `yaml.dump` but the + `groups` key itself must be included as this is required by Loki, + for example as in `yaml.dump({"groups": alerts["groups"]})`. + + Currently only accepts a list of rules and these + rules are all placed into a single group, even though Loki itself + allows for multiple groups within a single alert rules file. + + Returns: + a dictionary of alert rule groups and associated scrape + metadata indexed by relation ID. + """ + alerts = {} # type: Dict[str, dict] # mapping b/w juju identifiers and alert rule files + for relation in self._charm.model.relations[self._relation_name]: + if not relation.units or not relation.app: + continue + + alert_rules = json.loads(relation.data[relation.app].get("alert_rules", "{}")) + if not alert_rules: + continue + + alert_rules = self._inject_alert_expr_labels(alert_rules) + + identifier, topology = self._get_identifier_by_alert_rules(alert_rules) + if not topology: + try: + metadata = json.loads(relation.data[relation.app]["metadata"]) + identifier = JujuTopology.from_dict(metadata).identifier + alerts[identifier] = self._tool.apply_label_matchers(alert_rules) # type: ignore + + except KeyError as e: + logger.debug( + "Relation %s has no 'metadata': %s", + relation.id, + e, + ) + + if not identifier: + logger.error( + "Alert rules were found but no usable group or identifier was present." + ) + continue + + _, errmsg = self._tool.validate_alert_rules(cast(OfficialRuleFileFormat, alert_rules)) + if errmsg: + if self._charm.unit.is_leader(): + relation.data[self._charm.app]["event"] = json.dumps({"errors": errmsg}) + continue + + alerts[identifier] = alert_rules + + return alerts + + def _get_identifier_by_alert_rules( + self, rules: dict + ) -> Tuple[Union[str, None], Union[JujuTopology, None]]: + """Determine an appropriate dict key for alert rules. + + The key is used as the filename when writing alerts to disk, so the structure + and uniqueness is important. + + Args: + rules: a dict of alert rules + Returns: + A tuple containing an identifier, if found, and a JujuTopology, if it could + be constructed. + """ + if "groups" not in rules: + logger.debug("No alert groups were found in relation data") + return None, None + + # Construct an ID based on what's in the alert rules if they have labels + for group in rules["groups"]: + try: + labels = group["rules"][0]["labels"] + topology = JujuTopology( + # Don't try to safely get required constructor fields. There's already + # a handler for KeyErrors + model_uuid=labels["juju_model_uuid"], + model=labels["juju_model"], + application=labels["juju_application"], + unit=labels.get("juju_unit", ""), + charm_name=labels.get("juju_charm", ""), + ) + return topology.identifier, topology + except KeyError: + logger.debug("Alert rules were found but no usable labels were present") + continue + + logger.warning( + "No labeled alert rules were found, and no 'scrape_metadata' " + "was available. Using the alert group name as filename." + ) + try: + for group in rules["groups"]: + return group["name"], None + except KeyError: + logger.debug("No group name was found to use as identifier") + + return None, None + + def _inject_alert_expr_labels(self, rules: Dict[str, Any]) -> Dict[str, Any]: + """Iterate through alert rules and inject topology into expressions. + + Args: + rules: a dict of alert rules + """ + if "groups" not in rules: + return rules + + modified_groups = [] + for group in rules["groups"]: + # Copy off rules, so we don't modify an object we're iterating over + rules_copy = group["rules"] + for idx, rule in enumerate(rules_copy): + labels = rule.get("labels") + + if labels: + try: + topology = JujuTopology( + # Don't try to safely get required constructor fields. There's already + # a handler for KeyErrors + model_uuid=labels["juju_model_uuid"], + model=labels["juju_model"], + application=labels["juju_application"], + unit=labels.get("juju_unit", ""), + charm_name=labels.get("juju_charm", ""), + ) + + # Inject topology and put it back in the list + rule["expr"] = self._tool.inject_label_matchers( + re.sub(r"%%juju_topology%%,?", "", rule["expr"]), + topology.label_matcher_dict, + ) + except KeyError: + # Some required JujuTopology key is missing. Just move on. + pass + + group["rules"][idx] = rule + + modified_groups.append(group) + + rules["groups"] = modified_groups + return rules + + +class ConsumerBase(Object): + """Consumer's base class.""" + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = False, + skip_alert_topology_labeling: bool = False, + *, + forward_alert_rules: bool = True, + extra_alert_labels: Dict = {}, + ): + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._forward_alert_rules = forward_alert_rules + self._extra_alert_labels = extra_alert_labels + self.topology = JujuTopology.from_charm(charm) + + try: + alert_rules_path = _resolve_dir_against_charm_path(charm, alert_rules_path) + except InvalidAlertRulePathError as e: + logger.debug( + "Invalid Loki alert rules folder at %s: %s", + e.alert_rules_absolute_path, + e.message, + ) + self._alert_rules_path = alert_rules_path + self._skip_alert_topology_labeling = skip_alert_topology_labeling + + self._recursive = recursive + + @staticmethod + def _inject_extra_labels_to_alert_rules(rules: Dict, extra_alert_labels: Dict) -> Dict: + """Return a copy of the rules dict with extra labels injected.""" + result = copy.deepcopy(rules) + for group in result.get("groups", []): + for rule in group.get("rules", []): + rule.setdefault("labels", {}).update(extra_alert_labels) + return result + + def _handle_alert_rules(self, relation): + if not self._charm.unit.is_leader(): + return + + alert_rules = ( + AlertRules(query_type="logql") + if self._skip_alert_topology_labeling + else AlertRules(query_type="logql", topology=self.topology) + ) + if self._forward_alert_rules: + alert_rules.add_path(self._alert_rules_path, recursive=self._recursive) + alert_rules_as_dict = alert_rules.as_dict() + + if self._extra_alert_labels: + alert_rules_as_dict = ConsumerBase._inject_extra_labels_to_alert_rules( + alert_rules_as_dict, self._extra_alert_labels + ) + + relation.data[self._charm.app]["metadata"] = json.dumps(self.topology.as_dict()) + relation.data[self._charm.app]["alert_rules"] = json.dumps( + alert_rules_as_dict, + sort_keys=True, # sort, to prevent unnecessary relation_changed events + ) + + @property + def loki_endpoints(self) -> List[dict]: + """Fetch Loki Push API endpoints sent from LokiPushApiProvider through relation data. + + Returns: + A list of unique dictionaries with Loki Push API endpoints, for instance: + [ + {"url": "http://loki1:3100/loki/api/v1/push"}, + {"url": "http://loki2:3100/loki/api/v1/push"}, + ] + """ + endpoints = [] + seen_urls = set() + + for relation in self._charm.model.relations[self._relation_name]: + for unit in relation.units: + if unit.app == self._charm.app: + continue + + if not (endpoint := relation.data[unit].get("endpoint")): + continue + + deserialized_endpoint = json.loads(endpoint) + url = deserialized_endpoint.get("url") + + # Deduplicate by URL. + # With loki-k8s we have ingress-per-unit, so in that case + # we do want to collect the URLs of all the units. + # With loki-coordinator-k8s, even when the coordinator + # is scaled, we want to advertise only one URL. + # Without deduplication, we'd end up with the same + # tls config section in the promtail config file, in which + # case promtail immediately exits with the following error: + # [promtail] level=error ts= msg="error creating promtail" error="failed to create client manager: duplicate client configs are not allowed, found duplicate for name: " + + if not url or url in seen_urls: + continue + + seen_urls.add(url) + endpoints.append(deserialized_endpoint) + + return endpoints + + + +class LokiPushApiConsumer(ConsumerBase): + """Loki Consumer class.""" + + on = LokiPushApiEvents() # pyright: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = True, + skip_alert_topology_labeling: bool = False, + *, + refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None, + forward_alert_rules: bool = True, + extra_alert_labels: Dict = {}, + ): + """Construct a Loki charm client. + + The `LokiPushApiConsumer` object provides configurations to a Loki client charm, such as + the Loki API endpoint to push logs. It is intended for workloads that can speak + loki_push_api (https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki), such + as grafana-agent. + (If you need to forward workload stdout logs, then use LogForwarder; if you need to forward + log files, then use LogProxyConsumer.) + + `LokiPushApiConsumer` can be instantiated as follows: + + self._loki_consumer = LokiPushApiConsumer(self) + + Args: + charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. + Typically, this is `self` in the instantiating class. + relation_name: the string name of the relation interface to look up. + If `charm` has exactly one relation with this interface, the relation's + name is returned. If none or multiple relations with the provided interface + are found, this method will raise either a NoRelationWithInterfaceFoundError or + MultipleRelationsWithInterfaceFoundError exception, respectively. + alert_rules_path: a string indicating a path where alert rules can be found + recursive: Whether to scan for rule files recursively. + skip_alert_topology_labeling: whether to skip the alert topology labeling. + forward_alert_rules: a boolean flag to toggle forwarding of charmed alert rules. + extra_alert_labels: Dict of extra labels to inject alert rules with. + refresh_event: an optional bound event or list of bound events which + will be observed to re-set scrape job data (IP address and others) + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + + Emits: + loki_push_api_endpoint_joined: This event is emitted when the relation between the + Charmed Operator that instantiates `LokiPushApiProvider` (Loki charm for instance) + and the Charmed Operator that instantiates `LokiPushApiConsumer` is established. + loki_push_api_endpoint_departed: This event is emitted when the relation between the + Charmed Operator that implements `LokiPushApiProvider` (Loki charm for instance) + and the Charmed Operator that implements `LokiPushApiConsumer` is removed. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + super().__init__( + charm, + relation_name, + alert_rules_path, + recursive, + skip_alert_topology_labeling, + forward_alert_rules=forward_alert_rules, + extra_alert_labels=extra_alert_labels, + ) + events = self._charm.on[relation_name] + self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) + self.framework.observe(self._charm.on.config_changed, self._on_lifecycle_event) + self.framework.observe(events.relation_joined, self._on_logging_relation_joined) + self.framework.observe(events.relation_changed, self._on_logging_relation_changed) + self.framework.observe(events.relation_departed, self._on_logging_relation_departed) + + if refresh_event: + if not isinstance(refresh_event, list): + refresh_event = [refresh_event] + for ev in refresh_event: + self.framework.observe(ev, self._on_lifecycle_event) + + def _on_lifecycle_event(self, _: HookEvent): + """Update require relation data on charm upgrades and other lifecycle events. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + """ + # Upgrade event or other charm-level event + self._reinitialize_alert_rules() + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_joined(self, event: RelationJoinedEvent): + """Handle changes in related consumers. + + Update relation data and emit events when a relation is established. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + + Emits: + loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + # Alert rules will not change over the lifecycle of a charm, and do not need to be + # constantly set on every relation_changed event. Leave them here. + self._handle_alert_rules(event.relation) + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_changed(self, event: RelationEvent): + """Handle changes in related consumers. + + Anytime there are changes in the relation between Loki + and its consumers charms. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + + Emits: + loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + self.on.loki_push_api_endpoint_joined.emit() + + def reload_alerts(self) -> None: + """Reloads alert rules and updates all relations.""" + self._reinitialize_alert_rules() + + def _reinitialize_alert_rules(self): + for relation in self._charm.model.relations[self._relation_name]: + self._handle_alert_rules(relation) + + def _process_logging_relation_changed(self, relation: Relation): + self._handle_alert_rules(relation) + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_departed(self, _: RelationEvent): + """Handle departures in related providers. + + Anytime there are departures in relations between the consumer charm and Loki + the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event. + The consumer charm can then choose to update its configuration. + """ + # Provide default to avoid throwing, as in some complicated scenarios with + # upgrades and hook failures we might not have data in the storage + self.on.loki_push_api_endpoint_departed.emit() + + +class ContainerNotFoundError(Exception): + """Raised if the specified container does not exist.""" + + def __init__(self): + msg = "The specified container does not exist." + self.message = msg + + super().__init__(self.message) + + +class PromtailDigestError(EventBase): + """Event emitted when there is an error with Promtail initialization.""" + + def __init__(self, handle, message): + super().__init__(handle) + self.message = message + + def snapshot(self): + """Save message information.""" + return {"message": self.message} + + def restore(self, snapshot): + """Restore message information.""" + self.message = snapshot["message"] + + +class LogProxyEndpointDeparted(EventBase): + """Event emitted when a Log Proxy has departed.""" + + +class LogProxyEndpointJoined(EventBase): + """Event emitted when a Log Proxy joins.""" + + +class LogProxyEvents(ObjectEvents): + """Event descriptor for events raised by `LogProxyConsumer`.""" + + promtail_digest_error = EventSource(PromtailDigestError) + log_proxy_endpoint_departed = EventSource(LogProxyEndpointDeparted) + log_proxy_endpoint_joined = EventSource(LogProxyEndpointJoined) + + +class LogProxyConsumer(ConsumerBase): + """LogProxyConsumer class. + + > Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju + > 3.6 LTS. + + The `LogProxyConsumer` object provides a method for attaching `promtail` to + a workload in order to generate structured logging data from applications + which traditionally log to syslog or do not have native Loki integration. + The `LogProxyConsumer` can be instantiated as follows: + + self._log_proxy = LogProxyConsumer( + self, + logs_scheme={ + "workload-a": { + "log-files": ["/tmp/worload-a-1.log", "/tmp/worload-a-2.log"], + "syslog-port": 1514, + }, + "workload-b": {"log-files": ["/tmp/worload-b.log"], "syslog-port": 1515}, + }, + relation_name="log-proxy", + ) + + Args: + charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. + Typically, this is `self` in the instantiating class. + logs_scheme: a dict which maps containers and a list of log files and syslog port. + relation_name: the string name of the relation interface to look up. + If `charm` has exactly one relation with this interface, the relation's + name is returned. If none or multiple relations with the provided interface + are found, this method will raise either a NoRelationWithInterfaceFoundError or + MultipleRelationsWithInterfaceFoundError exception, respectively. + containers_syslog_port: a dict which maps (and enable) containers and syslog port. + alert_rules_path: an optional path for the location of alert rules + files. Defaults to "./src/loki_alert_rules", + resolved from the directory hosting the charm entry file. + The alert rules are automatically updated on charm upgrade. + recursive: Whether to scan for rule files recursively. + promtail_resource_name: An optional promtail resource name from metadata + if it has been modified and attached + insecure_skip_verify: skip SSL verification. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + + on = LogProxyEvents() # pyright: ignore + + def __init__( + self, + charm, + *, + logs_scheme=None, + relation_name: str = DEFAULT_LOG_PROXY_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = False, + promtail_resource_name: Optional[str] = None, + insecure_skip_verify: bool = False, + ): + super().__init__(charm, relation_name, alert_rules_path, recursive) + self._charm = charm + self._logs_scheme = logs_scheme or {} + self._relation_name = relation_name + self.topology = JujuTopology.from_charm(charm) + self._promtail_resource_name = promtail_resource_name or "promtail-bin" + self.insecure_skip_verify = insecure_skip_verify + self._promtails_ports = self._generate_promtails_ports(logs_scheme) + + # architecture used for promtail binary + arch = platform.processor() + if arch in ["x86_64", "amd64"]: + self._arch = "amd64" + elif arch in ["aarch64", "arm64", "armv8b", "armv8l"]: + self._arch = "arm64" + else: + self._arch = arch + + events = self._charm.on[relation_name] + self.framework.observe(events.relation_created, self._on_relation_created) + self.framework.observe(events.relation_changed, self._on_relation_changed) + self.framework.observe(events.relation_departed, self._on_relation_departed) + self._observe_pebble_ready() + + def _observe_pebble_ready(self): + for container in self._containers.keys(): + snake_case_container_name = container.replace("-", "_") + self.framework.observe( + getattr(self._charm.on, f"{snake_case_container_name}_pebble_ready"), + self._on_pebble_ready, + ) + + def _on_pebble_ready(self, event: WorkloadEvent): + """Event handler for `pebble_ready`.""" + if self.model.relations[self._relation_name]: + self._setup_promtail(event.workload) + + def _on_relation_created(self, _: RelationCreatedEvent) -> None: + """Event handler for `relation_created`.""" + for container in self._containers.values(): + if container.can_connect(): + self._setup_promtail(container) + + def _on_relation_changed(self, event: RelationEvent) -> None: + """Event handler for `relation_changed`. + + Args: + event: The event object `RelationChangedEvent`. + """ + self._handle_alert_rules(event.relation) + + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + for container in self._containers.values(): + if not container.can_connect(): + continue + if self.model.relations[self._relation_name]: + if "promtail" not in container.get_plan().services: + self._setup_promtail(container) + continue + + new_config = self._promtail_config(container.name) + if new_config != self._current_config(container): + container.push( + WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True + ) + + # Loki may send endpoints late. Don't necessarily start, there may be + # no clients + if new_config["clients"]: + container.restart(WORKLOAD_SERVICE_NAME) + self.on.log_proxy_endpoint_joined.emit() + else: + self.on.promtail_digest_error.emit("No promtail client endpoints available!") + + def _on_relation_departed(self, _: RelationEvent) -> None: + """Event handler for `relation_departed`. + + Args: + event: The event object `RelationDepartedEvent`. + """ + for container in self._containers.values(): + if not container.can_connect(): + continue + if not self._charm.model.relations[self._relation_name]: + container.stop(WORKLOAD_SERVICE_NAME) + continue + + new_config = self._promtail_config(container.name) + if new_config != self._current_config(container): + container.push(WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True) + + if new_config["clients"]: + container.restart(WORKLOAD_SERVICE_NAME) + else: + container.stop(WORKLOAD_SERVICE_NAME) + self.on.log_proxy_endpoint_departed.emit() + + def _add_pebble_layer(self, workload_binary_path: str, container: Container) -> None: + """Adds Pebble layer that manages Promtail service in Workload container. + + Args: + workload_binary_path: string providing path to promtail binary in workload container. + container: container into which the layer is to be added. + """ + pebble_layer = Layer( + { + "summary": "promtail layer", + "description": "pebble config layer for promtail", + "services": { + WORKLOAD_SERVICE_NAME: { + "override": "replace", + "summary": WORKLOAD_SERVICE_NAME, + "command": f"{workload_binary_path} {self._cli_args}", + "startup": "disabled", + } + }, + } + ) + container.add_layer(container.name, pebble_layer, combine=True) + + def _create_directories(self, container: Container) -> None: + """Creates the directories for Promtail binary and config file.""" + container.make_dir(path=WORKLOAD_BINARY_DIR, make_parents=True) + container.make_dir(path=WORKLOAD_CONFIG_DIR, make_parents=True) + + def _obtain_promtail(self, promtail_info: dict, container: Container) -> None: + """Obtain promtail binary from an attached resource or download it. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + container: container into which promtail is to be obtained. + """ + workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) + if self._promtail_attached_as_resource: + self._push_promtail_if_attached(container, workload_binary_path) + return + + if self._promtail_must_be_downloaded(promtail_info): + self._download_and_push_promtail_to_workload(container, promtail_info) + else: + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + self._push_binary_to_workload(container, binary_path, workload_binary_path) + + def _push_binary_to_workload( + self, container: Container, binary_path: str, workload_binary_path: str + ) -> None: + """Push promtail binary into workload container. + + Args: + binary_path: path in charm container from which promtail binary is read. + workload_binary_path: path in workload container to which promtail binary is pushed. + container: container into which promtail is to be uploaded. + """ + with open(binary_path, "rb") as f: + container.push(workload_binary_path, f, permissions=0o755, make_dirs=True) + logger.debug("The promtail binary file has been pushed to the workload container.") + + @property + def _promtail_attached_as_resource(self) -> bool: + """Checks whether Promtail binary is attached to the charm or not. + + Returns: + a boolean representing whether Promtail binary is attached as a resource or not. + """ + try: + self._charm.model.resources.fetch(self._promtail_resource_name) + return True + except ModelError: + return False + except NameError as e: + if "invalid resource name" in str(e): + return False + raise + + def _push_promtail_if_attached(self, container: Container, workload_binary_path: str) -> bool: + """Checks whether Promtail binary is attached to the charm or not. + + Args: + workload_binary_path: string specifying expected path of promtail + in workload container + container: container into which promtail is to be pushed. + + Returns: + a boolean representing whether Promtail binary is attached or not. + """ + logger.info("Promtail binary file has been obtained from an attached resource.") + resource_path = self._charm.model.resources.fetch(self._promtail_resource_name) + self._push_binary_to_workload(container, resource_path, workload_binary_path) + return True + + def _promtail_must_be_downloaded(self, promtail_info: dict) -> bool: + """Checks whether promtail binary must be downloaded or not. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + + Returns: + a boolean representing whether Promtail binary must be downloaded or not. + """ + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + if not self._is_promtail_binary_in_charm(binary_path): + return True + + if not self._sha256sums_matches(binary_path, promtail_info["binsha"]): + return True + + logger.debug("Promtail binary file is already in the the charm container.") + return False + + def _sha256sums_matches(self, file_path: str, sha256sum: str) -> bool: + """Checks whether a file's sha256sum matches or not with a specific sha256sum. + + Args: + file_path: A string representing the files' patch. + sha256sum: The sha256sum against which we want to verify. + + Returns: + a boolean representing whether a file's sha256sum matches or not with + a specific sha256sum. + """ + try: + with open(file_path, "rb") as f: + file_bytes = f.read() + result = sha256(file_bytes).hexdigest() + + if result != sha256sum: + msg = "File sha256sum mismatch, expected:'{}' but got '{}'".format( + sha256sum, result + ) + logger.debug(msg) + return False + + return True + except (APIError, FileNotFoundError): + msg = "File: '{}' could not be opened".format(file_path) + logger.error(msg) + return False + + def _is_promtail_binary_in_charm(self, binary_path: str) -> bool: + """Check if Promtail binary is already stored in charm container. + + Args: + binary_path: string path of promtail binary to check + + Returns: + a boolean representing whether Promtail is present or not. + """ + return True if Path(binary_path).is_file() else False + + def _download_and_push_promtail_to_workload( + self, container: Container, promtail_info: dict + ) -> None: + """Downloads a Promtail zip file and pushes the binary to the workload. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + container: container into which promtail is to be uploaded. + """ + # Check for Juju proxy variables and fall back to standard ones if not set + # If no Juju proxy variable was set, we set proxies to None to let the ProxyHandler get + # the proxy env variables from the environment + proxies = { + # The ProxyHandler uses only the protocol names as keys + # https://docs.python.org/3/library/urllib.request.html#urllib.request.ProxyHandler + "https": os.environ.get("JUJU_CHARM_HTTPS_PROXY", ""), + "http": os.environ.get("JUJU_CHARM_HTTP_PROXY", ""), + # The ProxyHandler uses `no` for the no_proxy key + # https://github.com/python/cpython/blob/3.12/Lib/urllib/request.py#L2553 + "no": os.environ.get("JUJU_CHARM_NO_PROXY", ""), + } + proxies = {k: v for k, v in proxies.items() if v != ""} or None + + proxy_handler = request.ProxyHandler(proxies) + opener = request.build_opener(proxy_handler) + + with opener.open(promtail_info["url"]) as r: + file_bytes = r.read() + file_path = os.path.join(BINARY_DIR, promtail_info["filename"] + ".gz") + with open(file_path, "wb") as f: + f.write(file_bytes) + logger.info( + "Promtail binary zip file has been downloaded and stored in: %s", + file_path, + ) + + decompressed_file = GzipFile(fileobj=BytesIO(file_bytes)) + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + with open(binary_path, "wb") as outfile: + outfile.write(decompressed_file.read()) + logger.debug("Promtail binary file has been downloaded.") + + workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) + self._push_binary_to_workload(container, binary_path, workload_binary_path) + + @property + def _cli_args(self) -> str: + """Return the cli arguments to pass to promtail. + + Returns: + The arguments as a string + """ + return "-config.file={}".format(WORKLOAD_CONFIG_PATH) + + def _current_config(self, container) -> dict: + """Property that returns the current Promtail configuration. + + Returns: + A dict containing Promtail configuration. + """ + if not container.can_connect(): + logger.debug("Could not connect to promtail container!") + return {} + try: + raw_current = container.pull(WORKLOAD_CONFIG_PATH).read() + return yaml.safe_load(raw_current) + except (ProtocolError, PathError) as e: + logger.warning( + "Could not check the current promtail configuration due to " + "a failure in retrieving the file: %s", + e, + ) + return {} + + def _promtail_config(self, container_name: str) -> dict: + """Generates the config file for Promtail. + + Reference: https://grafana.com/docs/loki/latest/send-data/promtail/configuration + """ + config = {"clients": self._clients_list()} + if self.insecure_skip_verify: + for client in config["clients"]: + client["tls_config"] = {"insecure_skip_verify": True} + + config.update(self._server_config(container_name)) + config.update(self._positions) + config.update(self._scrape_configs(container_name)) + return config + + def _clients_list(self) -> list: + """Generates a list of clients for use in the promtail config. + + Returns: + A list of endpoints + """ + return self.loki_endpoints + + def _server_config(self, container_name: str) -> dict: + """Generates the server section of the Promtail config file. + + Returns: + A dict representing the `server` section. + """ + return { + "server": { + "http_listen_port": self._promtails_ports[container_name]["http_listen_port"], + "grpc_listen_port": self._promtails_ports[container_name]["grpc_listen_port"], + } + } + + @property + def _positions(self) -> dict: + """Generates the positions section of the Promtail config file. + + Returns: + A dict representing the `positions` section. + """ + return {"positions": {"filename": WORKLOAD_POSITIONS_PATH}} + + def _scrape_configs(self, container_name: str) -> dict: + """Generates the scrape_configs section of the Promtail config file. + + Returns: + A dict representing the `scrape_configs` section. + """ + job_name = f"juju_{self.topology.identifier}" + + # The new JujuTopology doesn't include unit, but LogProxyConsumer should have it + common_labels = { + f"juju_{k}": v + for k, v in self.topology.as_dict(remapped_keys={"charm_name": "charm"}).items() + } + common_labels["container"] = container_name + scrape_configs = [] + + # Files config + labels = common_labels.copy() + labels.update( + { + "job": job_name, + "__path__": "", + } + ) + config = {"targets": ["localhost"], "labels": labels} + scrape_config = { + "job_name": "system", + "static_configs": self._generate_static_configs(config, container_name), + } + scrape_configs.append(scrape_config) + + # Syslog config + syslog_port = self._logs_scheme.get(container_name, {}).get("syslog-port") + if syslog_port: + relabel_mappings = [ + "severity", + "facility", + "hostname", + "app_name", + "proc_id", + "msg_id", + ] + syslog_labels = common_labels.copy() + syslog_labels.update({"job": f"{job_name}_syslog"}) + syslog_config = { + "job_name": "syslog", + "syslog": { + "listen_address": f"127.0.0.1:{syslog_port}", + "label_structured_data": True, + "labels": syslog_labels, + }, + "relabel_configs": [ + {"source_labels": [f"__syslog_message_{val}"], "target_label": val} + for val in relabel_mappings + ] + + [{"action": "labelmap", "regex": "__syslog_message_sd_(.+)"}], + } + scrape_configs.append(syslog_config) # type: ignore + + return {"scrape_configs": scrape_configs} + + def _generate_static_configs(self, config: dict, container_name: str) -> list: + """Generates static_configs section. + + Returns: + - a list of dictionaries representing static_configs section + """ + static_configs = [] + + for _file in self._logs_scheme.get(container_name, {}).get("log-files", []): + conf = deepcopy(config) + conf["labels"]["__path__"] = _file + static_configs.append(conf) + + return static_configs + + def _setup_promtail(self, container: Container) -> None: + # Use the first + relations = self._charm.model.relations[self._relation_name] + if len(relations) > 1: + logger.debug( + "Multiple log_proxy relations. Getting Promtail from application {}".format( + relations[0].app.name + ) + ) + relation = relations[0] + promtail_binaries = json.loads( + relation.data[relation.app].get("promtail_binary_zip_url", "{}") + ) + if not promtail_binaries: + return + + self._create_directories(container) + self._ensure_promtail_binary(promtail_binaries, container) + + container.push( + WORKLOAD_CONFIG_PATH, + yaml.safe_dump(self._promtail_config(container.name)), + make_dirs=True, + ) + + workload_binary_path = os.path.join( + WORKLOAD_BINARY_DIR, promtail_binaries[self._arch]["filename"] + ) + self._add_pebble_layer(workload_binary_path, container) + + if self._current_config(container).get("clients"): + try: + container.restart(WORKLOAD_SERVICE_NAME) + except ChangeError as e: + self.on.promtail_digest_error.emit(str(e)) + else: + self.on.log_proxy_endpoint_joined.emit() + else: + self.on.promtail_digest_error.emit("No promtail client endpoints available!") + + def _ensure_promtail_binary(self, promtail_binaries: dict, container: Container): + if self._is_promtail_installed(promtail_binaries[self._arch], container): + return + + try: + self._obtain_promtail(promtail_binaries[self._arch], container) + except URLError as e: + msg = f"Promtail binary couldn't be downloaded - {str(e)}" + logger.warning(msg) + self.on.promtail_digest_error.emit(msg) + + def _is_promtail_installed(self, promtail_info: dict, container: Container) -> bool: + """Determine if promtail has already been installed to the container. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must at least contain a key + "filename" giving the name of promtail binary + container: container in which to check whether promtail is installed. + """ + workload_binary_path = f"{WORKLOAD_BINARY_DIR}/{promtail_info['filename']}" + try: + container.list_files(workload_binary_path) + except (APIError, FileNotFoundError): + return False + return True + + def _generate_promtails_ports(self, logs_scheme) -> dict: + return { + container: { + "http_listen_port": HTTP_LISTEN_PORT_START + 2 * i, + "grpc_listen_port": GRPC_LISTEN_PORT_START + 2 * i, + } + for i, container in enumerate(logs_scheme.keys()) + } + + def syslog_port(self, container_name: str) -> str: + """Gets the port on which promtail is listening for syslog in this container. + + Returns: + A str representing the port + """ + return str(self._logs_scheme.get(container_name, {}).get("syslog-port")) + + def rsyslog_config(self, container_name: str) -> str: + """Generates a config line for use with rsyslog. + + Returns: + The rsyslog config line as a string + """ + return 'action(type="omfwd" protocol="tcp" target="127.0.0.1" port="{}" Template="RSYSLOG_SyslogProtocol23Format" TCP_Framing="octet-counted")'.format( + self._logs_scheme.get(container_name, {}).get("syslog-port") + ) + + @property + def _containers(self) -> Dict[str, Container]: + return {cont: self._charm.unit.get_container(cont) for cont in self._logs_scheme.keys()} + + +class _PebbleLogClient: + @staticmethod + def check_juju_version() -> bool: + """Make sure the Juju version supports Log Forwarding.""" + juju_version = JujuVersion.from_environ() + if not juju_version > JujuVersion(version=str("3.3")): + msg = f"Juju version {juju_version} does not support Pebble log forwarding. Juju >= 3.4 is needed." + logger.warning(msg) + return False + return True + + @staticmethod + def _build_log_target( + unit_name: str, loki_endpoint: str, topology: JujuTopology, enable: bool + ) -> Dict: + """Build a log target for the log forwarding Pebble layer. + + Log target's syntax for enabling/disabling forwarding is explained here: + https://github.com/canonical/pebble?tab=readme-ov-file#log-forwarding + """ + services_value = ["all"] if enable else ["-all"] + + log_target = { + "override": "replace", + "services": services_value, + "type": "loki", + "location": loki_endpoint, + } + if enable: + log_target.update( + { + "labels": { + "product": "Juju", + "charm": topology._charm_name, + "juju_model": topology._model, + "juju_model_uuid": topology._model_uuid, + "juju_application": topology._application, + "juju_unit": topology._unit, + }, + } + ) + + return {unit_name: log_target} + + @staticmethod + def _build_log_targets( + loki_endpoints: Optional[Dict[str, str]], topology: JujuTopology, enable: bool + ): + """Build all the targets for the log forwarding Pebble layer.""" + targets = {} + if not loki_endpoints: + return targets + + for unit_name, endpoint in loki_endpoints.items(): + targets.update( + _PebbleLogClient._build_log_target( + unit_name=unit_name, + loki_endpoint=endpoint, + topology=topology, + enable=enable, + ) + ) + return targets + + @staticmethod + def disable_inactive_endpoints( + container: Container, active_endpoints: Dict[str, str], topology: JujuTopology + ): + """Disable forwarding for inactive endpoints by checking against the Pebble plan.""" + pebble_layer = container.get_plan().to_dict().get("log-targets", None) + if not pebble_layer: + return + + for unit_name, target in pebble_layer.items(): + # If the layer is a disabled log forwarding endpoint, skip it + if "-all" in target["services"]: # pyright: ignore + continue + + if unit_name not in active_endpoints: + layer = Layer( + { # pyright: ignore + "log-targets": _PebbleLogClient._build_log_targets( + loki_endpoints={unit_name: "(removed)"}, + topology=topology, + enable=False, + ) + } + ) + container.add_layer(f"{container.name}-log-forwarding", layer=layer, combine=True) + + @staticmethod + def enable_endpoints( + container: Container, active_endpoints: Dict[str, str], topology: JujuTopology + ): + """Enable forwarding for the specified Loki endpoints.""" + layer = Layer( + { # pyright: ignore + "log-targets": _PebbleLogClient._build_log_targets( + loki_endpoints=active_endpoints, + topology=topology, + enable=True, + ) + } + ) + container.add_layer(f"{container.name}-log-forwarding", layer, combine=True) + + +class LogForwarder(ConsumerBase): + """Forward the standard outputs of all workloads operated by a charm to one or multiple Loki endpoints. + + This class implements Pebble log forwarding. Juju >= 3.4 is needed. + """ + + def __init__( + self, + charm: CharmBase, + *, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = True, + skip_alert_topology_labeling: bool = False, + refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None, + forward_alert_rules: bool = True, + ): + _PebbleLogClient.check_juju_version() + super().__init__( + charm, + relation_name, + alert_rules_path, + recursive, + skip_alert_topology_labeling, + forward_alert_rules=forward_alert_rules, + ) + self._charm = charm + self._relation_name = relation_name + + on = self._charm.on[self._relation_name] + self.framework.observe(on.relation_joined, self._update_logging) + self.framework.observe(on.relation_changed, self._update_logging) + self.framework.observe(on.relation_departed, self._update_logging) + self.framework.observe(on.relation_broken, self._update_logging) + + if refresh_event: + if not isinstance(refresh_event, list): + refresh_event = [refresh_event] + for ev in refresh_event: + self.framework.observe(ev, self._update_logging) + + for container_name in self._charm.meta.containers.keys(): + snake_case_container_name = container_name.replace("-", "_") + self.framework.observe( + getattr(self._charm.on, f"{snake_case_container_name}_pebble_ready"), + self._on_pebble_ready, + ) + + def _on_pebble_ready(self, event: PebbleReadyEvent): + if not (loki_endpoints := self._retrieve_endpoints_from_relation()): + logger.warning("No Loki endpoints available") + return + + self._update_endpoints(event.workload, loki_endpoints) + + def _update_logging(self, event: RelationEvent): + """Update the log forwarding to match the active Loki endpoints.""" + if not (loki_endpoints := self._retrieve_endpoints_from_relation()): + logger.warning("No Loki endpoints available") + return + + for container in self._charm.unit.containers.values(): + if container.can_connect(): + self._update_endpoints(container, loki_endpoints) + # else: `_update_endpoints` will be called on pebble-ready anyway. + + self._handle_alert_rules(event.relation) + + def _retrieve_endpoints_from_relation(self) -> dict: + loki_endpoints = {} + + # Get the endpoints from relation data + for relation in self._charm.model.relations[self._relation_name]: + loki_endpoints.update(self._fetch_endpoints(relation)) + + return loki_endpoints + + def _update_endpoints(self, container: Container, loki_endpoints: dict): + _PebbleLogClient.disable_inactive_endpoints( + container=container, + active_endpoints=loki_endpoints, + topology=self.topology, + ) + _PebbleLogClient.enable_endpoints( + container=container, active_endpoints=loki_endpoints, topology=self.topology + ) + + def is_ready(self, relation: Optional[Relation] = None): + """Check if the relation is active and healthy.""" + if not relation: + relations = self._charm.model.relations[self._relation_name] + if not relations: + return False + return all(self.is_ready(relation) for relation in relations) + + try: + if self._extract_urls(relation): + return True + return False + except (KeyError, json.JSONDecodeError): + return False + + def _extract_urls(self, relation: Relation) -> Dict[str, str]: + """Default getter function to extract Loki endpoints from a relation. + + Returns: + A dictionary of remote units and the respective Loki endpoint. + { + "loki/0": "http://loki:3100/loki/api/v1/push", + "another-loki/0": "http://another-loki:3100/loki/api/v1/push", + } + """ + endpoints: Dict = {} + + for unit in relation.units: + endpoint = relation.data[unit]["endpoint"] + deserialized_endpoint = json.loads(endpoint) + url = deserialized_endpoint["url"] + endpoints[unit.name] = url + + return endpoints + + def _fetch_endpoints(self, relation: Relation) -> Dict[str, str]: + """Fetch Loki Push API endpoints from relation data using the endpoints getter.""" + endpoints: Dict = {} + + if not self.is_ready(relation): + logger.warning(f"The relation '{relation.name}' is not ready yet.") + return endpoints + + # if the code gets here, the function won't raise anymore because it's + # also called in is_ready() + endpoints = self._extract_urls(relation) + + return endpoints + + +def charm_logging_config( + endpoint_requirer: LokiPushApiConsumer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[List[str]], Optional[str]]: + """Utility function to determine the charm_logging config you will likely want. + + If no endpoint is provided: + disable charm logging. + If https endpoint is provided but cert_path is not found on disk: + disable charm logging. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm logging (with or without tls, as appropriate) + + Args: + endpoint_requirer: an instance of LokiPushApiConsumer. + cert_path: a path where a cert is stored. + + Returns: + A tuple with (optionally) the values of the endpoints and the certificate path. + + Raises: + LokiPushApiError: if some endpoint are http and others https. + """ + endpoints = [ep["url"] for ep in endpoint_requirer.loki_endpoints] + if not endpoints: + return None, None + + https = tuple(endpoint.startswith("https://") for endpoint in endpoints) + + if all(https): # all endpoints are https + if cert_path is None: + raise LokiPushApiError("Cannot send logs to https endpoints without a certificate.") + if not Path(cert_path).exists(): + # if endpoints is https BUT we don't have a server_cert yet: + # disable charm logging until we do to prevent tls errors + return None, None + return endpoints, str(cert_path) + + if all(not x for x in https): # all endpoints are http + return endpoints, None + + # if there's a disagreement, that's very weird: + raise LokiPushApiError("Some endpoints are http, some others are https. That's not good.") diff --git a/charms/garm-operator/lib/charms/observability_libs/v0/juju_topology.py b/charms/garm-operator/lib/charms/observability_libs/v0/juju_topology.py new file mode 100644 index 00000000..55969d64 --- /dev/null +++ b/charms/garm-operator/lib/charms/observability_libs/v0/juju_topology.py @@ -0,0 +1,311 @@ +# Copyright 2022 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to use the `JujuTopology` class to +create and consume topology information from Juju in a consistent manner. + +The goal of the Juju topology is to uniquely identify a piece +of software running across any of your Juju-managed deployments. +This is achieved by combining the following four elements: + +- Model name +- Model UUID +- Application name +- Unit identifier + + +For a more in-depth description of the concept, as well as a +walk-through of it's use-case in observability, see +[this blog post](https://juju.is/blog/model-driven-observability-part-2-juju-topology-metrics) +on the Juju blog. + +## Library Usage + +This library may be used to create and consume `JujuTopology` objects. +The `JujuTopology` class provides three ways to create instances: + +### Using the `from_charm` method + +Enables instantiation by supplying the charm as an argument. When +creating topology objects for the current charm, this is the recommended +approach. + +```python +topology = JujuTopology.from_charm(self) +``` + +### Using the `from_dict` method + +Allows for instantion using a dictionary of relation data, like the +`scrape_metadata` from Prometheus or the labels of an alert rule. When +creating topology objects for remote charms, this is the recommended +approach. + +```python +scrape_metadata = json.loads(relation.data[relation.app].get("scrape_metadata", "{}")) +topology = JujuTopology.from_dict(scrape_metadata) +``` + +### Using the class constructor + +Enables instantiation using whatever values you want. While this +is useful in some very specific cases, this is almost certainly not +what you are looking for as setting these values manually may +result in observability metrics which do not uniquely identify a +charm in order to provide accurate usage reporting, alerting, +horizontal scaling, or other use cases. + +```python +topology = JujuTopology( + model="some-juju-model", + model_uuid="00000000-0000-0000-0000-000000000001", + application="fancy-juju-application", + unit="fancy-juju-application/0", + charm_name="fancy-juju-application-k8s", +) +``` + +""" + +import warnings +from collections import OrderedDict +from typing import Dict, List, Optional +from uuid import UUID + +# The unique Charmhub library identifier, never change it +LIBID = "bced1658f20f49d28b88f61f83c2d232" + +LIBAPI = 0 +LIBPATCH = 7 + + +class InvalidUUIDError(Exception): + """Invalid UUID was provided.""" + + def __init__(self, uuid: str): + self.message = "'{}' is not a valid UUID.".format(uuid) + super().__init__(self.message) + + +class JujuTopology: + """JujuTopology is used for storing, generating and formatting juju topology information. + + DEPRECATED: This class is deprecated. Use `pip install cosl` and + `from cosl.juju_topology import JujuTopology` instead. + """ + + def __init__( + self, + model: str, + model_uuid: str, + application: str, + unit: Optional[str] = None, + charm_name: Optional[str] = None, + ): + """Build a JujuTopology object. + + A `JujuTopology` object is used for storing and transforming + Juju topology information. This information is used to + annotate Prometheus scrape jobs and alert rules. Such + annotation when applied to scrape jobs helps in identifying + the source of the scrapped metrics. On the other hand when + applied to alert rules topology information ensures that + evaluation of alert expressions is restricted to the source + (charm) from which the alert rules were obtained. + + Args: + model: a string name of the Juju model + model_uuid: a globally unique string identifier for the Juju model + application: an application name as a string + unit: a unit name as a string + charm_name: name of charm as a string + """ + warnings.warn( + """ + observability_libs.v0.juju_topology is deprecated. Please import the + library from `cosl` instead: https://github.com/canonical/cos-lib + """, + DeprecationWarning, + ) + if not self.is_valid_uuid(model_uuid): + raise InvalidUUIDError(model_uuid) + + self._model = model + self._model_uuid = model_uuid + self._application = application + self._charm_name = charm_name + self._unit = unit + + def is_valid_uuid(self, uuid): + """Validate the supplied UUID against the Juju Model UUID pattern. + + Args: + uuid: string that needs to be checked if it is valid v4 UUID. + + Returns: + True if parameter is a valid v4 UUID, False otherwise. + """ + try: + return str(UUID(uuid, version=4)) == uuid + except (ValueError, TypeError): + return False + + @classmethod + def from_charm(cls, charm): + """Creates a JujuTopology instance by using the model data available on a charm object. + + Args: + charm: a `CharmBase` object for which the `JujuTopology` will be constructed + Returns: + a `JujuTopology` object. + """ + return cls( + model=charm.model.name, + model_uuid=charm.model.uuid, + application=charm.model.app.name, + unit=charm.model.unit.name, + charm_name=charm.meta.name, + ) + + @classmethod + def from_dict(cls, data: dict): + """Factory method for creating `JujuTopology` children from a dictionary. + + Args: + data: a dictionary with five keys providing topology information. The keys are + - "model" + - "model_uuid" + - "application" + - "unit" + - "charm_name" + `unit` and `charm_name` may be empty, but will result in more limited + labels. However, this allows us to support charms without workloads. + + Returns: + a `JujuTopology` object. + """ + return cls( + model=data["model"], + model_uuid=data["model_uuid"], + application=data["application"], + unit=data.get("unit", ""), + charm_name=data.get("charm_name", ""), + ) + + def as_dict( + self, + *, + remapped_keys: Optional[Dict[str, str]] = None, + excluded_keys: Optional[List[str]] = None, + ) -> OrderedDict: + """Format the topology information into an ordered dict. + + Keeping the dictionary ordered is important to be able to + compare dicts without having to resort to deep comparisons. + + Args: + remapped_keys: A dictionary mapping old key names to new key names, + which will be substituted when invoked. + excluded_keys: A list of key names to exclude from the returned dict. + uuid_length: The length to crop the UUID to. + """ + ret = OrderedDict( + [ + ("model", self.model), + ("model_uuid", self.model_uuid), + ("application", self.application), + ("unit", self.unit), + ("charm_name", self.charm_name), + ] + ) + if excluded_keys: + ret = OrderedDict({k: v for k, v in ret.items() if k not in excluded_keys}) + + if remapped_keys: + ret = OrderedDict( + (remapped_keys.get(k), v) if remapped_keys.get(k) else (k, v) + for k, v in ret.items() # type: ignore + ) + + return ret + + @property + def identifier(self) -> str: + """Format the topology information into a terse string. + + This crops the model UUID, making it unsuitable for comparisons against + anything but other identifiers. Mainly to be used as a display name or file + name where long strings might become an issue. + + >>> JujuTopology( \ + model = "a-model", \ + model_uuid = "00000000-0000-4000-8000-000000000000", \ + application = "some-app", \ + unit = "some-app/1" \ + ).identifier + 'a-model_00000000_some-app' + """ + parts = self.as_dict( + excluded_keys=["unit", "charm_name"], + ) + + parts["model_uuid"] = self.model_uuid_short + values = parts.values() + + return "_".join([str(val) for val in values]).replace("/", "_") + + @property + def label_matcher_dict(self) -> Dict[str, str]: + """Format the topology information into a dict with keys having 'juju_' as prefix. + + Relabelled topology never includes the unit as it would then only match + the leader unit (ie. the unit that produced the dict). + """ + items = self.as_dict( + remapped_keys={"charm_name": "charm"}, + excluded_keys=["unit"], + ).items() + + return {"juju_{}".format(key): value for key, value in items if value} + + @property + def label_matchers(self) -> str: + """Format the topology information into a promql/logql label matcher string. + + Topology label matchers should never include the unit as it + would then only match the leader unit (ie. the unit that + produced the matchers). + """ + items = self.label_matcher_dict.items() + return ", ".join(['{}="{}"'.format(key, value) for key, value in items if value]) + + @property + def model(self) -> str: + """Getter for the juju model value.""" + return self._model + + @property + def model_uuid(self) -> str: + """Getter for the juju model uuid value.""" + return self._model_uuid + + @property + def model_uuid_short(self) -> str: + """Getter for the juju model value, truncated to the first eight letters.""" + return self._model_uuid[:8] + + @property + def application(self) -> str: + """Getter for the juju application value.""" + return self._application + + @property + def charm_name(self) -> Optional[str]: + """Getter for the juju charm name value.""" + return self._charm_name + + @property + def unit(self) -> Optional[str]: + """Getter for the juju unit value.""" + return self._unit diff --git a/charms/garm-operator/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/charms/garm-operator/lib/charms/prometheus_k8s/v0/prometheus_scrape.py new file mode 100644 index 00000000..ff52245c --- /dev/null +++ b/charms/garm-operator/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -0,0 +1,1946 @@ +# Copyright 2021 Canonical Ltd. +# See LICENSE file for licensing details. +"""Prometheus Scrape Library. + +## Overview + +This document explains how to integrate with the Prometheus charm +for the purpose of providing a metrics endpoint to Prometheus. It +also explains how alternative implementations of the Prometheus charms +may maintain the same interface and be backward compatible with all +currently integrated charms. Finally this document is the +authoritative reference on the structure of relation data that is +shared between Prometheus charms and any other charm that intends to +provide a scrape target for Prometheus. + +## Source code + +Source code can be found on GitHub at: + https://github.com/canonical/prometheus-k8s-operator/tree/main/lib/charms/prometheus_k8s + +## Provider Library Usage + +This Prometheus charm interacts with its scrape targets using its +charm library. Charms seeking to expose metric endpoints for the +Prometheus charm, must do so using the `MetricsEndpointProvider` +object from this charm library. For the simplest use cases, using the +`MetricsEndpointProvider` object only requires instantiating it, +typically in the constructor of your charm (the one which exposes a +metrics endpoint). The `MetricsEndpointProvider` constructor requires +the name of the relation over which a scrape target (metrics endpoint) +is exposed to the Prometheus charm. This relation must use the +`prometheus_scrape` interface. By default address of the metrics +endpoint is set to the unit IP address, by each unit of the +`MetricsEndpointProvider` charm. These units set their address in +response to the `PebbleReady` event of each container in the unit, +since container restarts of Kubernetes charms can result in change of +IP addresses. The default name for the metrics endpoint relation is +`metrics-endpoint`. It is strongly recommended to use the same +relation name for consistency across charms and doing so obviates the +need for an additional constructor argument. The +`MetricsEndpointProvider` object may be instantiated as follows + + from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider + + def __init__(self, *args): + super().__init__(*args) + ... + self.metrics_endpoint = MetricsEndpointProvider(self) + ... + +Note that the first argument (`self`) to `MetricsEndpointProvider` is +always a reference to the parent (scrape target) charm. + +An instantiated `MetricsEndpointProvider` object will ensure that each +unit of its parent charm, is a scrape target for the +`MetricsEndpointConsumer` (Prometheus) charm. By default +`MetricsEndpointProvider` assumes each unit of the consumer charm +exports its metrics at a path given by `/metrics` on port 80. These +defaults may be changed by providing the `MetricsEndpointProvider` +constructor an optional argument (`jobs`) that represents a +Prometheus scrape job specification using Python standard data +structures. This job specification is a subset of Prometheus' own +[scrape +configuration](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) +format but represented using Python data structures. More than one job +may be provided using the `jobs` argument. Hence `jobs` accepts a list +of dictionaries where each dictionary represents one `` +object as described in the Prometheus documentation. The currently +supported configuration subset is: `job_name`, `metrics_path`, +`static_configs` + +Suppose it is required to change the port on which scraped metrics are +exposed to 8000. This may be done by providing the following data +structure as the value of `jobs`. + +``` +[ + { + "static_configs": [ + { + "targets": ["*:8000"] + } + ] + } +] +``` + +The wildcard ("*") host specification implies that the scrape targets +will automatically be set to the host addresses advertised by each +unit of the consumer charm. + +It is also possible to change the metrics path and scrape multiple +ports, for example + +``` +[ + { + "metrics_path": "/my-metrics-path", + "static_configs": [ + { + "targets": ["*:8000", "*:8081"], + } + ] + } +] +``` + +More complex scrape configurations are possible. For example + +``` +[ + { + "static_configs": [ + { + "targets": ["10.1.32.215:7000", "*:8000"], + "labels": { + "some_key": "some-value" + } + } + ] + } +] +``` + +This example scrapes the target "10.1.32.215" at port 7000 in addition +to scraping each unit at port 8000. There is however one difference +between wildcard targets (specified using "*") and fully qualified +targets (such as "10.1.32.215"). The Prometheus charm automatically +associates labels with metrics generated by each target. These labels +localise the source of metrics within the Juju topology by specifying +its "model name", "model UUID", "application name" and "unit +name". However unit name is associated only with wildcard targets but +not with fully qualified targets. + +Multiple jobs with different metrics paths and labels are allowed, but +each job must be given a unique name: + +``` +[ + { + "job_name": "my-first-job", + "metrics_path": "one-path", + "static_configs": [ + { + "targets": ["*:7000"], + "labels": { + "some_key": "some-value" + } + } + ] + }, + { + "job_name": "my-second-job", + "metrics_path": "another-path", + "static_configs": [ + { + "targets": ["*:8000"], + "labels": { + "some_other_key": "some-other-value" + } + } + ] + } +] +``` + +**Important:** `job_name` should be a fixed string (e.g. hardcoded literal). +For instance, if you include variable elements, like your `unit.name`, it may break +the continuity of the metrics time series gathered by Prometheus when the leader unit +changes (e.g. on upgrade or rescale). + +Additionally, it is also technically possible, but **strongly discouraged**, to +configure the following scrape-related settings, which behave as described by the +[Prometheus documentation](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config): + +- `static_configs` +- `scrape_interval` +- `scrape_timeout` +- `proxy_url` +- `relabel_configs` +- `metric_relabel_configs` +- `sample_limit` +- `label_limit` +- `label_name_length_limit` +- `label_value_length_limit` + +The settings above are supported by the `prometheus_scrape` library only for the sake of +specialized facilities like the [Prometheus Scrape Config](https://charmhub.io/prometheus-scrape-config-k8s) +charm. Virtually no charms should use these settings, and charmers definitely **should not** +expose them to the Juju administrator via configuration options. + +## Consumer Library Usage + +The `MetricsEndpointConsumer` object may be used by Prometheus +charms to manage relations with their scrape targets. For this +purposes a Prometheus charm needs to do two things + +1. Instantiate the `MetricsEndpointConsumer` object by providing it a +reference to the parent (Prometheus) charm and optionally the name of +the relation that the Prometheus charm uses to interact with scrape +targets. This relation must confirm to the `prometheus_scrape` +interface and it is strongly recommended that this relation be named +`metrics-endpoint` which is its default value. + +For example a Prometheus charm may instantiate the +`MetricsEndpointConsumer` in its constructor as follows + + from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointConsumer + + def __init__(self, *args): + super().__init__(*args) + ... + self.metrics_consumer = MetricsEndpointConsumer(self) + ... + +2. A Prometheus charm also needs to respond to the +`TargetsChangedEvent` event of the `MetricsEndpointConsumer` by adding itself as +an observer for these events, as in + + self.framework.observe( + self.metrics_consumer.on.targets_changed, + self._on_scrape_targets_changed, + ) + +In responding to the `TargetsChangedEvent` event the Prometheus +charm must update the Prometheus configuration so that any new scrape +targets are added and/or old ones removed from the list of scraped +endpoints. For this purpose the `MetricsEndpointConsumer` object +exposes a `jobs()` method that returns a list of scrape jobs. Each +element of this list is the Prometheus scrape configuration for that +job. In order to update the Prometheus configuration, the Prometheus +charm needs to replace the current list of jobs with the list provided +by `jobs()` as follows + + def _on_scrape_targets_changed(self, event): + ... + scrape_jobs = self.metrics_consumer.jobs() + for job in scrape_jobs: + prometheus_scrape_config.append(job) + ... + +## Alerting Rules + +This charm library also supports gathering alerting rules from all +related `MetricsEndpointProvider` charms and enabling corresponding alerts within the +Prometheus charm. Alert rules are automatically gathered by `MetricsEndpointProvider` +charms when using this library, from a directory conventionally named +`prometheus_alert_rules`. This directory must reside at the top level +in the `src` folder of the consumer charm. Each file in this directory +is assumed to be in one of two formats: +- the official prometheus alert rule format, conforming to the +[Prometheus docs](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) +- a single rule format, which is a simplified subset of the official format, +comprising a single alert rule per file, using the same YAML fields. + +The file name must have one of the following extensions: +- `.rule` +- `.rules` +- `.yml` +- `.yaml` + +An example of the contents of such a file in the custom single rule +format is shown below. + +``` +alert: HighRequestLatency +expr: job:request_latency_seconds:mean5m{my_key=my_value} > 0.5 +for: 10m +labels: + severity: Medium + type: HighLatency +annotations: + summary: High request latency for {{ $labels.instance }}. +``` + +The `MetricsEndpointProvider` will read all available alert rules and +also inject "filtering labels" into the alert expressions. The +filtering labels ensure that alert rules are localised to the metrics +provider charm's Juju topology (application, model and its UUID). Such +a topology filter is essential to ensure that alert rules submitted by +one provider charm generates alerts only for that same charm. When +alert rules are embedded in a charm, and the charm is deployed as a +Juju application, the alert rules from that application have their +expressions automatically updated to filter for metrics coming from +the units of that application alone. This remove risk of spurious +evaluation, e.g., when you have multiple deployments of the same charm +monitored by the same Prometheus. + +Not all alerts one may want to specify can be embedded in a +charm. Some alert rules will be specific to a user's use case. This is +the case, for example, of alert rules that are based on business +constraints, like expecting a certain amount of requests to a specific +API every five minutes. Such alert rules can be specified via the +[COS Config Charm](https://charmhub.io/cos-configuration-k8s), +which allows importing alert rules and other settings like dashboards +from a Git repository. + +Gathering alert rules and generating rule files within the Prometheus +charm is easily done using the `alerts()` method of +`MetricsEndpointConsumer`. Alerts generated by Prometheus will +automatically include Juju topology labels in the alerts. These labels +indicate the source of the alert. The following labels are +automatically included with each alert + +- `juju_model` +- `juju_model_uuid` +- `juju_application` + +## Relation Data + +The Prometheus charm uses both application and unit relation data to +obtain information regarding its scrape jobs, alert rules and scrape +targets. This relation data is in JSON format and it closely resembles +the YAML structure of Prometheus [scrape configuration] +(https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config). + +Units of Metrics provider charms advertise their names and addresses +over unit relation data using the `prometheus_scrape_unit_name` and +`prometheus_scrape_unit_address` keys. While the `scrape_metadata`, +`scrape_jobs` and `alert_rules` keys in application relation data +of Metrics provider charms hold eponymous information. + +""" # noqa: W505 + +import copy +import hashlib +import ipaddress +import json +import logging +import os +import platform +import re +import socket +import subprocess +import tempfile +from collections import defaultdict +from pathlib import Path +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union +from urllib.parse import urlparse + +import yaml +from cosl import JujuTopology +from cosl.rules import AlertRules, generic_alert_groups +from ops.charm import CharmBase, RelationRole +from ops.framework import ( + BoundEvent, + EventBase, + EventSource, + Object, + ObjectEvents, + StoredDict, + StoredList, +) +from ops.model import Relation + +# The unique Charmhub library identifier, never change it +LIBID = "bc84295fef5f4049878f07b131968ee2" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 58 + +# Version 0.0.53 needed for cosl.rules.generic_alert_groups +PYDEPS = ["cosl>=0.0.53"] + +logger = logging.getLogger(__name__) + + +ALLOWED_KEYS = { + "job_name", + "metrics_path", + "static_configs", + "scrape_interval", + "scrape_timeout", + "proxy_url", + "relabel_configs", + "metric_relabel_configs", + "sample_limit", + "label_limit", + "label_name_length_limit", + "label_value_length_limit", + "scheme", + "basic_auth", + "tls_config", + "authorization", + "params", +} +DEFAULT_JOB = { + "metrics_path": "/metrics", + "static_configs": [{"targets": ["*:80"]}], +} + + +DEFAULT_RELATION_NAME = "metrics-endpoint" +RELATION_INTERFACE_NAME = "prometheus_scrape" + +DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/prometheus_alert_rules" + +FallbackScrapeProtocol = Literal[ + "PrometheusProto", + "OpenMetricsText0.0.1", + "OpenMetricsText1.0.0", + "PrometheusText0.0.4", + "PrometheusText1.0.0", +] + + +class PrometheusConfig: + """A namespace for utility functions for manipulating the prometheus config dict.""" + + # relabel instance labels so that instance identifiers are globally unique + # stable over unit recreation + topology_relabel_config = { + "source_labels": ["juju_model", "juju_model_uuid", "juju_application"], + "separator": "_", + "target_label": "instance", + "regex": "(.*)", + } + + topology_relabel_config_wildcard = { + "source_labels": ["juju_model", "juju_model_uuid", "juju_application", "juju_unit"], + "separator": "_", + "target_label": "instance", + "regex": "(.*)", + } + + @staticmethod + def sanitize_scrape_config(job: dict) -> dict: + """Restrict permissible scrape configuration options. + + If job is empty then a default job is returned. The + default job is + + ``` + { + "metrics_path": "/metrics", + "static_configs": [{"targets": ["*:80"]}], + } + ``` + + Args: + job: a dict containing a single Prometheus job + specification. + + Returns: + a dictionary containing a sanitized job specification. + """ + sanitized_job = DEFAULT_JOB.copy() + sanitized_job.update({key: value for key, value in job.items() if key in ALLOWED_KEYS}) + return sanitized_job + + @staticmethod + def sanitize_scrape_configs(scrape_configs: List[dict]) -> List[dict]: + """A vectorized version of `sanitize_scrape_config`.""" + return [PrometheusConfig.sanitize_scrape_config(job) for job in scrape_configs] + + @staticmethod + def prefix_job_names(scrape_configs: List[dict], prefix: str) -> List[dict]: + """Adds the given prefix to all the job names in the given scrape_configs list.""" + modified_scrape_configs = [] + for scrape_config in scrape_configs: + job_name = scrape_config.get("job_name") + modified = scrape_config.copy() + modified["job_name"] = prefix + "_" + job_name if job_name else prefix + modified_scrape_configs.append(modified) + + return modified_scrape_configs + + @staticmethod + def _build_host_to_unit( + hosts: Dict[str, Tuple[str, str, str]], + topology: Optional[JujuTopology], + ) -> Dict[str, str]: + """Build a reverse lookup dict: {address: unit_name, fqdn: unit_name, ...}. + + Maps each known unit identifier (IP address and/or FQDN) to its unit name, + so that non-wildcard targets can be matched whether specified as IP or FQDN. + + Returns an empty dict when ``topology`` is None, since matching only serves + the purpose of injecting ``juju_unit`` labels. + + The set subtraction ``{addr, fqdn} - {""}`` drops empty strings (absent FQDN, + e.g. when external_url is set) and deduplicates when addr == fqdn (non-IP + bind address). + """ + if not topology: + return {} + return { + identifier: unit_name + for unit_name, (addr, _, fqdn) in hosts.items() + for identifier in {addr, fqdn} - {""} + } + + @staticmethod + def _classify_targets(targets: List[str]) -> Tuple[List[str], List[str]]: + """Split a list of targets into wildcard and non-wildcard targets. + + Returns: + A ``(wildcard_targets, non_wildcard_targets)`` tuple. + """ + wildcard_targets = [] + non_wildcard_targets = [] + wildcard_re = re.compile(r"\*(?:(:\d+))?") + for target in targets: + if wildcard_re.match(target): + wildcard_targets.append(target) + else: + non_wildcard_targets.append(target) + return wildcard_targets, non_wildcard_targets + + @staticmethod + def _match_non_wildcard_targets( + targets: List[str], + host_to_unit: Dict[str, str], + ) -> Tuple[Dict[str, List[str]], List[str]]: + """Match non-wildcard targets against known unit addresses. + + Parses the host portion of each target (handling IPv6 bracket notation) and + looks it up in ``host_to_unit``. + + Returns: + A ``(matched_by_unit, unmatched_targets)`` tuple where ``matched_by_unit`` + maps each matched unit name to the list of targets belonging to it, and + ``unmatched_targets`` contains targets with no unit match. + """ + matched_by_unit: Dict[str, List[str]] = {} + unmatched_targets: List[str] = [] + for target in targets: + # urlparse correctly handles IPv6 (e.g. [::1]:9093), host:port, and + # bare hostnames — unlike a naive split(":")[0]. + parsed = urlparse(f"//{target}") + target_host = parsed.hostname or target.split(":", 1)[0] + matched_unit = host_to_unit.get(target_host) + if matched_unit: + matched_by_unit.setdefault(matched_unit, []).append(target) + else: + unmatched_targets.append(target) + return matched_by_unit, unmatched_targets + + @staticmethod + def _build_per_unit_job( + job: dict, + static_config: dict, + targets: List[str], + unit_name: str, + unit_path: str, + topology: Optional[JujuTopology], + ) -> dict: + """Build a single per-unit scrape job with topology labels and relabeling rules. + + Used for both wildcard and matched non-wildcard targets to avoid duplication. + + Args: + job: the original scrape job dict to base the new job on. + static_config: the original static_config dict to copy labels from. + targets: the resolved target addresses for this unit. + unit_name: the Juju unit name (e.g. "alertmanager/0"). + unit_path: path prefix to prepend to the metrics path (from external URL, may be ""). + topology: optional topology for adding Juju labels. + + Returns: + A new scrape job dict for this unit. + """ + unit_num = unit_name.split("/")[-1] + new_static = static_config.copy() + new_static["targets"] = targets + new_job = job.copy() + new_job["job_name"] = new_job.get("job_name", "unnamed-job") + "-" + unit_num + new_job["metrics_path"] = unit_path + (new_job.get("metrics_path") or "/metrics") + if topology: + new_static["labels"] = { + **topology.label_matcher_dict, + "juju_unit": unit_name, + **new_static.get("labels", {}), + } + # Instance relabeling for topology should be last in order. + new_job["relabel_configs"] = new_job.get("relabel_configs", []) + [ + PrometheusConfig.topology_relabel_config_wildcard + ] + new_job["static_configs"] = [new_static] + return new_job + + @staticmethod + def expand_wildcard_targets_into_individual_jobs( + scrape_jobs: List[dict], + hosts: Dict[str, Tuple[str, str, str]], + topology: Optional[JujuTopology] = None, + ) -> List[dict]: + """Extract wildcard hosts from the given scrape_configs list into separate jobs. + + For wildcard targets (e.g. "*:9093"), one job per unit is created. When + ``topology`` is provided, the ``juju_unit`` label is injected into each + per-unit job; without ``topology`` the per-unit jobs are created but no + topology labels are added. + + For non-wildcard targets (fully qualified hostnames/IPs), the host portion of + each target is matched against the known unit addresses in ``hosts``. Targets + whose address matches a known unit are expanded into a per-unit job (with + ``juju_unit`` when ``topology`` is provided), mirroring the wildcard behaviour. + Targets with no match (e.g. external services) are kept in a single job without + ``juju_unit``, preserving the previous behaviour. + + Args: + scrape_jobs: list of scrape jobs. + hosts: a dictionary mapping unit names to ``(address, path, fqdn)`` tuples for + all units of the relation for which this job configuration must be + constructed. + topology: optional arg for adding topology labels to scrape targets. + When ``None``, wildcard targets are still expanded into per-unit jobs but + no ``juju_unit`` or topology labels are added. Non-wildcard target matching + is skipped entirely (all non-wildcard targets are kept in a single job), + since matching only serves the purpose of injecting ``juju_unit`` labels. + """ + # Build a reverse lookup: {address: unit_name, fqdn: unit_name, ...} + # so that non-wildcard targets can be matched whether specified as IP or FQDN. + # The set subtraction {addr, fqdn} - {""} drops empty strings (absent FQDN) + # and deduplicates when addr == fqdn (non-IP bind address). + host_to_unit = PrometheusConfig._build_host_to_unit(hosts, topology) + + modified_scrape_jobs = [] + for job in scrape_jobs: + static_configs = job.get("static_configs") + if not static_configs: + continue + + # Accumulates non-wildcard targets that could not be matched to any known unit. + # These are kept in a single job with topology-only labels (no juju_unit): + # fully-qualified targets that predate this feature are unaffected. + unmatched_static_configs = [] + + for static_config in static_configs: + targets = static_config.get("targets") + if not targets: + continue + + wildcard_targets, non_wildcard_targets = PrometheusConfig._classify_targets( + targets + ) + + # Non-wildcard targets: try to match each target's host against known unit + # addresses. Matched targets get a per-unit job with juju_unit; unmatched + # targets get topology-only labels with no per-unit expansion. + if non_wildcard_targets: + matched_by_unit, unmatched_targets = ( + PrometheusConfig._match_non_wildcard_targets( + non_wildcard_targets, host_to_unit + ) + ) + + # Unmatched targets: no unit mapping found — kept with topology-only + # labels and no per-unit expansion (juju_unit is not added). + if unmatched_targets: + unmatched_static_config = static_config.copy() + unmatched_static_config["targets"] = unmatched_targets + if topology: + unmatched_static_config["labels"] = { + **topology.label_matcher_dict, + **unmatched_static_config.get("labels", {}), + } + unmatched_static_configs.append(unmatched_static_config) + + # Matched targets: one per-unit job with juju_unit label. + for unit_name, unit_targets_list in matched_by_unit.items(): + _, unit_path, _ = hosts.get(unit_name, ("", "", "")) + modified_scrape_jobs.append( + PrometheusConfig._build_per_unit_job( + job, static_config, unit_targets_list, unit_name, unit_path, topology + ) + ) + + # Wildcard targets: one per-unit job per host, replacing "*" with the unit address. + if wildcard_targets: + for unit_name, (unit_hostname, unit_path, _unit_fqdn) in hosts.items(): + resolved_targets = [ + target.replace("*", unit_hostname) for target in wildcard_targets + ] + modified_scrape_jobs.append( + PrometheusConfig._build_per_unit_job( + job, static_config, resolved_targets, unit_name, unit_path, topology + ) + ) + + if unmatched_static_configs: + modified_job = job.copy() + modified_job["static_configs"] = unmatched_static_configs + modified_job["metrics_path"] = modified_job.get("metrics_path") or "/metrics" + + if topology: + # Instance relabeling for topology should be last in order. + modified_job["relabel_configs"] = modified_job.get("relabel_configs", []) + [ + PrometheusConfig.topology_relabel_config + ] + + modified_scrape_jobs.append(modified_job) + + return modified_scrape_jobs + + @staticmethod + def render_alertmanager_static_configs(alertmanagers: List[str]): + """Render the alertmanager static_configs section from a list of URLs. + + Each target must be in the hostname:port format, and prefixes are specified in a separate + key. Therefore, with ingress in place, would need to extract the path into the + `path_prefix` key, which is higher up in the config hierarchy. + + https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alertmanager_config + + Args: + alertmanagers: List of alertmanager URLs. + + Returns: + A dict representation for the static_configs section. + """ + # Make sure it's a valid url so urlparse could parse it. + scheme = re.compile(r"^https?://") + sanitized = [am if scheme.search(am) else "http://" + am for am in alertmanagers] + + # Create a mapping from paths to netlocs + # Group alertmanager targets into a dictionary of lists: + # {path: [netloc1, netloc2]} + paths = defaultdict(list) # type: Dict[Tuple[str, str], List[str]] + for parsed in map(urlparse, sanitized): + path = parsed.path or "/" + paths[(parsed.scheme, path)].append(parsed.netloc) + + return { + "alertmanagers": [ + { + # For https we still do not render a `tls_config` section because + # certs are expected to be made available by the charm via the + # `update-ca-certificates` mechanism. + "scheme": scheme, + "path_prefix": path_prefix, + "static_configs": [{"targets": netlocs}], + } + for (scheme, path_prefix), netlocs in paths.items() + ] + } + + +class RelationNotFoundError(Exception): + """Raised if there is no relation with the given name is found.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + + super().__init__(self.message) + + +class RelationInterfaceMismatchError(Exception): + """Raised if the relation with the given name has a different interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + + super().__init__(self.message) + + +class RelationRoleMismatchError(Exception): + """Raised if the relation with the given name has a different role.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + + super().__init__(self.message) + + +class InvalidAlertRuleEvent(EventBase): + """Event emitted when alert rule files are not parsable. + + Enables us to set a clear status on the provider. + """ + + def __init__(self, handle, errors: str = "", valid: bool = False): + super().__init__(handle) + self.errors = errors + self.valid = valid + + def snapshot(self) -> Dict: + """Save alert rule information.""" + return { + "valid": self.valid, + "errors": self.errors, + } + + def restore(self, snapshot): + """Restore alert rule information.""" + self.valid = snapshot["valid"] + self.errors = snapshot["errors"] + + +class InvalidScrapeJobEvent(EventBase): + """Event emitted when alert rule files are not valid.""" + + def __init__(self, handle, errors: str = ""): + super().__init__(handle) + self.errors = errors + + def snapshot(self) -> Dict: + """Save error information.""" + return {"errors": self.errors} + + def restore(self, snapshot): + """Restore error information.""" + self.errors = snapshot["errors"] + + +class MetricsEndpointProviderEvents(ObjectEvents): + """Events raised by :class:`InvalidAlertRuleEvent`s.""" + + alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) + invalid_scrape_job = EventSource(InvalidScrapeJobEvent) + + +def _type_convert_stored(obj): + """Convert Stored* to their appropriate types, recursively.""" + if isinstance(obj, StoredList): + return list(map(_type_convert_stored, obj)) + if isinstance(obj, StoredDict): + rdict = {} # type: Dict[Any, Any] + for k in obj.keys(): + rdict[k] = _type_convert_stored(obj[k]) + return rdict + return obj + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Verifies that a relation has the necessary characteristics. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + actual_relation_interface = relation.interface_name + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, expected_relation_interface, actual_relation_interface or "None" + ) + + if expected_relation_role == RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role == RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +class InvalidAlertRulePathError(Exception): + """Raised if the alert rules folder cannot be found or is otherwise invalid.""" + + def __init__( + self, + alert_rules_absolute_path: Path, + message: str, + ): + self.alert_rules_absolute_path = alert_rules_absolute_path + self.message = message + + super().__init__(self.message) + + +class TargetsChangedEvent(EventBase): + """Event emitted when Prometheus scrape targets change.""" + + def __init__(self, handle, relation_id): + super().__init__(handle) + self.relation_id = relation_id + + def snapshot(self): + """Save scrape target relation information.""" + return {"relation_id": self.relation_id} + + def restore(self, snapshot): + """Restore scrape target relation information.""" + self.relation_id = snapshot["relation_id"] + + +class MonitoringEvents(ObjectEvents): + """Event descriptor for events raised by `MetricsEndpointConsumer`.""" + + targets_changed = EventSource(TargetsChangedEvent) + + +class MetricsEndpointConsumer(Object): + """A Prometheus based Monitoring service.""" + + on = MonitoringEvents() # pyright: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + fallback_scrape_protocol: Optional[FallbackScrapeProtocol] = None, + ): + """A Prometheus based Monitoring service. + + Args: + charm: a `CharmBase` instance that manages this + instance of the Prometheus service. + relation_name: an optional string name of the relation between `charm` + and the Prometheus charmed service. The default is "metrics-endpoint". + It is strongly advised not to change the default, so that people + deploying your charm will have a consistent experience with all + other charms that consume metrics endpoints. + fallback_scrape_protocol: an optional fallback protocol to use when the + Content-Type header of a scrape response is missing or invalid. Supported + values: "PrometheusProto", "OpenMetricsText0.0.1", "OpenMetricsText1.0.0", + "PrometheusText0.0.4", "PrometheusText1.0.0". Ref: + https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config. + This had to be added after we bumped to Prometheus workload major version 3. Starting in major 3, + Prometheus no longer defaults to the Prometheus text format (PrometheusText0.0.4) + when the Content-Type header is missing or invalid, and instead fails the scrape with an error. + This parameter should only be used by MetricsEndpointConsumers that use Prometheus 3 and above, as setting + this key in the scrape configs of Prometheus 2 will result in the error: + "field fallback_scrape_protocol not found in type config.ScrapeConfig". + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `prometheus_scrape` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._fallback_scrape_protocol = fallback_scrape_protocol + self._tool = CosTool(self._charm) + events = self._charm.on[relation_name] + self.framework.observe(events.relation_changed, self._on_metrics_provider_relation_changed) + self.framework.observe( + events.relation_departed, self._on_metrics_provider_relation_departed + ) + + def _on_metrics_provider_relation_changed(self, event): + """Handle changes with related metrics providers. + + Anytime there are changes in relations between Prometheus + and metrics provider charms the Prometheus charm is informed, + through a `TargetsChangedEvent` event. The Prometheus charm can + then choose to update its scrape configuration. + + Args: + event: a `CharmEvent` in response to which the Prometheus + charm must update its scrape configuration. + """ + rel_id = event.relation.id + + self.on.targets_changed.emit(relation_id=rel_id) + + def _on_metrics_provider_relation_departed(self, event): + """Update job config when a metrics provider departs. + + When a metrics provider departs the Prometheus charm is informed + through a `TargetsChangedEvent` event so that it can update its + scrape configuration to ensure that the departed metrics provider + is removed from the list of scrape jobs and + + Args: + event: a `CharmEvent` that indicates a metrics provider + unit has departed. + """ + rel_id = event.relation.id + self.on.targets_changed.emit(relation_id=rel_id) + + def jobs(self) -> list: + """Fetch the list of scrape jobs. + + Returns: + A list consisting of all the static scrape configurations + for each related `MetricsEndpointProvider` that has specified + its scrape targets. + """ + scrape_jobs = [] + + for relation in self._charm.model.relations[self._relation_name]: + static_scrape_jobs = self._static_scrape_config(relation) + if static_scrape_jobs: + # Duplicate job names will cause validate_scrape_jobs to fail. + # Therefore we need to dedupe here and after all jobs are collected. + static_scrape_jobs = _dedupe_job_names(static_scrape_jobs) + try: + self._tool.validate_scrape_jobs(static_scrape_jobs) + except subprocess.CalledProcessError as e: + if self._charm.unit.is_leader(): + data = json.loads(relation.data[self._charm.app].get("event", "{}")) + data["scrape_job_errors"] = str(e) + relation.data[self._charm.app]["event"] = json.dumps(data) + else: + scrape_jobs.extend(static_scrape_jobs) + + scrape_jobs = _dedupe_job_names(scrape_jobs) + + return scrape_jobs + + @property + def alerts(self) -> dict: + """Fetch alerts for all relations. + + A Prometheus alert rules file consists of a list of "groups". Each + group consists of a list of alerts (`rules`) that are sequentially + executed. This method returns all the alert rules provided by each + related metrics provider charm. These rules may be used to generate a + separate alert rules file for each relation since the returned list + of alert groups are indexed by that relations Juju topology identifier. + The Juju topology identifier string includes substrings that identify + alert rule related metadata such as the Juju model, model UUID and the + application name from where the alert rule originates. Since this + topology identifier is globally unique, it may be used for instance as + the name for the file into which the list of alert rule groups are + written. For each relation, the structure of data returned is a dictionary + representation of a standard prometheus rules file: + + {"groups": [{"name": ...}, ...]} + + per official prometheus documentation + https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/ + + The value of the `groups` key is such that it may be used to generate + a Prometheus alert rules file directly using `yaml.dump` but the + `groups` key itself must be included as this is required by Prometheus. + + For example the list of alert rule groups returned by this method may + be written into files consumed by Prometheus as follows + + ``` + for topology_identifier, alert_rule_groups in self.metrics_consumer.alerts().items(): + filename = "juju_" + topology_identifier + ".rules" + path = os.path.join(PROMETHEUS_RULES_DIR, filename) + rules = yaml.safe_dump(alert_rule_groups) + container.push(path, rules, make_dirs=True) + ``` + + Returns: + A dictionary mapping the Juju topology identifier of the source charm to + its list of alert rule groups. + """ + alerts = {} # type: Dict[str, dict] # mapping b/w juju identifiers and alert rule files + for relation in self._charm.model.relations[self._relation_name]: + if not relation.units or not relation.app: + continue + + alert_rules = json.loads(relation.data[relation.app].get("alert_rules", "{}")) + if not alert_rules: + continue + + alert_rules = self._inject_alert_expr_labels(alert_rules) + + identifier, topology = self._get_identifier_by_alert_rules(alert_rules) + if not topology: + try: + scrape_metadata = json.loads(relation.data[relation.app]["scrape_metadata"]) + identifier = JujuTopology.from_dict(scrape_metadata).identifier + + except KeyError as e: + logger.debug( + "Relation %s has no 'scrape_metadata': %s", + relation.id, + e, + ) + + if not identifier: + logger.error( + "Alert rules were found but no usable group or identifier was present." + ) + continue + + # We need to append the relation info to the identifier. This is to allow for cases for there are two + # relations which eventually scrape the same application. Issue #551. + identifier = f"{identifier}_{relation.name}_{relation.id}" + + alerts[identifier] = alert_rules + + _, errmsg = self._tool.validate_alert_rules(alert_rules) + if errmsg: + if alerts[identifier]: + del alerts[identifier] + if self._charm.unit.is_leader(): + data = json.loads(relation.data[self._charm.app].get("event", "{}")) + data["errors"] = errmsg + relation.data[self._charm.app]["event"] = json.dumps(data) + continue + + return alerts + + def _get_identifier_by_alert_rules( + self, rules: dict + ) -> Tuple[Union[str, None], Union[JujuTopology, None]]: + """Determine an appropriate dict key for alert rules. + + The key is used as the filename when writing alerts to disk, so the structure + and uniqueness is important. + + Args: + rules: a dict of alert rules + Returns: + A tuple containing an identifier, if found, and a JujuTopology, if it could + be constructed. + """ + if "groups" not in rules: + logger.debug("No alert groups were found in relation data") + return None, None + + # Construct an ID based on what's in the alert rules if they have labels + for group in rules["groups"]: + try: + labels = group["rules"][0]["labels"] + topology = JujuTopology( + # Don't try to safely get required constructor fields. There's already + # a handler for KeyErrors + model_uuid=labels["juju_model_uuid"], + model=labels["juju_model"], + application=labels["juju_application"], + unit=labels.get("juju_unit", ""), + charm_name=labels.get("juju_charm", ""), + ) + return topology.identifier, topology + except KeyError: + logger.debug("Alert rules were found but no usable labels were present") + continue + + logger.warning( + "No labeled alert rules were found, and no 'scrape_metadata' " + "was available. Using the alert group name as filename." + ) + try: + for group in rules["groups"]: + return group["name"], None + except KeyError: + logger.debug("No group name was found to use as identifier") + + return None, None + + def _inject_alert_expr_labels(self, rules: Dict[str, Any]) -> Dict[str, Any]: + """Iterate through alert rules and inject topology into expressions. + + Args: + rules: a dict of alert rules + """ + if "groups" not in rules: + return rules + + modified_groups = [] + for group in rules["groups"]: + # Copy off rules, so we don't modify an object we're iterating over + rules_copy = group["rules"] + for idx, rule in enumerate(rules_copy): + labels = rule.get("labels") + + if labels: + try: + topology = JujuTopology( + # Don't try to safely get required constructor fields. There's already + # a handler for KeyErrors + model_uuid=labels["juju_model_uuid"], + model=labels["juju_model"], + application=labels["juju_application"], + unit=labels.get("juju_unit", ""), + charm_name=labels.get("juju_charm", ""), + ) + + # Inject topology and put it back in the list + rule["expr"] = self._tool.inject_label_matchers( + re.sub(r"%%juju_topology%%,?", "", rule["expr"]), + topology.alert_expression_dict, + ) + except KeyError: + # Some required JujuTopology key is missing. Just move on. + pass + + group["rules"][idx] = rule + + modified_groups.append(group) + + rules["groups"] = modified_groups + return rules + + def _static_scrape_config(self, relation) -> list: + """Generate the static scrape configuration for a single relation. + + If the relation data includes `scrape_metadata` then the value + of this key is used to annotate the scrape jobs with Juju + Topology labels before returning them. + + Args: + relation: an `ops.model.Relation` object whose static + scrape configuration is required. + + Returns: + A list (possibly empty) of scrape jobs. Each job is a + valid Prometheus scrape configuration for that job, + represented as a Python dictionary. + """ + if not relation.units: + return [] + + scrape_configs = json.loads(relation.data[relation.app].get("scrape_jobs", "[]")) + + if not scrape_configs: + return [] + + scrape_metadata = json.loads(relation.data[relation.app].get("scrape_metadata", "{}")) + + if not scrape_metadata: + return scrape_configs + + topology = JujuTopology.from_dict(scrape_metadata) + + job_name_prefix = "juju_{}_prometheus_scrape".format(topology.identifier) + scrape_configs = PrometheusConfig.prefix_job_names(scrape_configs, job_name_prefix) + scrape_configs = PrometheusConfig.sanitize_scrape_configs(scrape_configs) + + hosts = self._relation_hosts(relation) + + scrape_configs = PrometheusConfig.expand_wildcard_targets_into_individual_jobs( + scrape_configs, hosts, topology + ) + + # For https scrape targets we still do not render a `tls_config` section because certs + # are expected to be made available by the charm via the `update-ca-certificates` mechanism. + + if self._fallback_scrape_protocol: + for job in scrape_configs: + job["fallback_scrape_protocol"] = self._fallback_scrape_protocol + + return scrape_configs + + def _relation_hosts(self, relation: Relation) -> Dict[str, Tuple[str, str, str]]: + """Returns a mapping from unit names to (address, path, fqdn) tuples. + + Args: + relation: the relation to read unit data from. + + Returns: + A dict mapping each unit name to a ``(address, path, fqdn)`` tuple. The + ``fqdn`` element may be an empty string when the FQDN is not known. When + present, it may either be distinct from, or equal to ``address``. For + example, when the unit address itself is already a hostname. + """ + hosts = {} + for unit in relation.units: + if not (unit_databag := relation.data.get(unit)): + continue + + unit_path = unit_databag.get("prometheus_scrape_unit_path", "") + # TODO deprecate and remove unit.name + unit_name = unit_databag.get("prometheus_scrape_unit_name") or unit.name + # TODO deprecate and remove "prometheus_scrape_host" + unit_address = unit_databag.get("prometheus_scrape_unit_address") or unit_databag.get( + "prometheus_scrape_host" + ) + unit_fqdn = unit_databag.get("prometheus_scrape_unit_fqdn", "") + + if not (unit_name and unit_address): + continue + + hosts.update({unit_name: (unit_address, unit_path, unit_fqdn)}) + + return hosts + + def _target_parts(self, target) -> list: + """Extract host and port from a wildcard target. + + Args: + target: a string specifying a scrape target. A + scrape target is expected to have the format + "host:port". The host part may be a wildcard + "*" and the port part can be missing (along + with ":") in which case port is set to 80. + + Returns: + a list with target host and port as in [host, port] + """ + if ":" in target: + parts = target.split(":") + else: + parts = [target, "80"] + + return parts + + +def _dedupe_job_names(jobs: List[dict]): + """Deduplicate a list of dicts by appending a hash to the value of the 'job_name' key. + + Additionally, fully de-duplicate any identical jobs. + + Args: + jobs: A list of prometheus scrape jobs + """ + jobs_copy = copy.deepcopy(jobs) + + # Convert to a dict with job names as keys + # I think this line is O(n^2) but it should be okay given the list sizes + jobs_dict = { + job["job_name"]: list(filter(lambda x: x["job_name"] == job["job_name"], jobs_copy)) + for job in jobs_copy + } + + # If multiple jobs have the same name, convert the name to "name_" + for key in jobs_dict: + if len(jobs_dict[key]) > 1: + for job in jobs_dict[key]: + job_json = json.dumps(job) + hashed = hashlib.sha256(job_json.encode()).hexdigest() + job["job_name"] = "{}_{}".format(job["job_name"], hashed) + new_jobs = [] + for key in jobs_dict: + new_jobs.extend(list(jobs_dict[key])) + + # Deduplicate jobs which are equal + # Again this in O(n^2) but it should be okay + deduped_jobs = [] + seen = [] + for job in new_jobs: + job_json = json.dumps(job) + hashed = hashlib.sha256(job_json.encode()).hexdigest() + if hashed in seen: + continue + seen.append(hashed) + deduped_jobs.append(job) + + return deduped_jobs + + +def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: + """Resolve the provided path items against the directory of the main file. + + Look up the directory of the `main.py` file being executed. This is normally + going to be the charm.py file of the charm including this library. Then, resolve + the provided path elements and, if the result path exists and is a directory, + return its absolute path; otherwise, raise en exception. + + Raises: + InvalidAlertRulePathError, if the path does not exist or is not a directory. + """ + charm_dir = Path(str(charm.charm_dir)) + if not charm_dir.exists() or not charm_dir.is_dir(): + # Operator Framework does not currently expose a robust + # way to determine the top level charm source directory + # that is consistent across deployed charms and unit tests + # Hence for unit tests the current working directory is used + # TODO: updated this logic when the following ticket is resolved + # https://github.com/canonical/operator/issues/643 + charm_dir = Path(os.getcwd()) + + alerts_dir_path = charm_dir.absolute().joinpath(*path_elements) + + if not alerts_dir_path.exists(): + raise InvalidAlertRulePathError(alerts_dir_path, "directory does not exist") + if not alerts_dir_path.is_dir(): + raise InvalidAlertRulePathError(alerts_dir_path, "is not a directory") + + return str(alerts_dir_path) + + +class MetricsEndpointProvider(Object): + """A metrics endpoint for Prometheus.""" + + on = MetricsEndpointProviderEvents() # pyright: ignore + + def __init__( + self, + charm, + relation_name: str = DEFAULT_RELATION_NAME, + jobs=None, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + refresh_event: Optional[Union[BoundEvent, List[BoundEvent]]] = None, + external_url: str = "", + lookaside_jobs_callable: Optional[Callable] = None, + *, + forward_alert_rules: bool = True, + ): + """Construct a metrics provider for a Prometheus charm. + + If your charm exposes a Prometheus metrics endpoint, the + `MetricsEndpointProvider` object enables your charm to easily + communicate how to reach that metrics endpoint. + + By default, a charm instantiating this object has the metrics + endpoints of each of its units scraped by the related Prometheus + charms. The scraped metrics are automatically tagged by the + Prometheus charms with Juju topology data via the + `juju_model_name`, `juju_model_uuid`, `juju_application_name` + and `juju_unit` labels. To support such tagging `MetricsEndpointProvider` + automatically forwards scrape metadata to a `MetricsEndpointConsumer` + (Prometheus charm). + + Scrape targets provided by `MetricsEndpointProvider` can be + customized when instantiating this object. For example in the + case of a charm exposing the metrics endpoint for each of its + units on port 8080 and the `/metrics` path, the + `MetricsEndpointProvider` can be instantiated as follows: + + self.metrics_endpoint_provider = MetricsEndpointProvider( + self, + jobs=[{ + "static_configs": [{"targets": ["*:8080"]}], + }]) + + The notation `*:` means "scrape each unit of this charm on port + ``. + + In case the metrics endpoints are not on the standard `/metrics` path, + a custom path can be specified as follows: + + self.metrics_endpoint_provider = MetricsEndpointProvider( + self, + jobs=[{ + "metrics_path": "/my/strange/metrics/path", + "static_configs": [{"targets": ["*:8080"]}], + }]) + + Note how the `jobs` argument is a list: this allows you to expose multiple + combinations of paths "metrics_path" and "static_configs" in case your charm + exposes multiple endpoints, which could happen, for example, when you have + multiple workload containers, with applications in each needing to be scraped. + The structure of the objects in the `jobs` list is one-to-one with the + `scrape_config` configuration item of Prometheus' own configuration (see + https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config + ), but with only a subset of the fields allowed. The permitted fields are + listed in `ALLOWED_KEYS` object in this charm library module. + + It is also possible to specify alert rules. By default, this library will look + into the `/prometheus_alert_rules`, which in a standard charm + layouts resolves to `src/prometheus_alert_rules`. Each alert rule goes into a + separate `*.rule` file. If the syntax of a rule is invalid, + the `MetricsEndpointProvider` logs an error and does not load the particular + rule. + + To avoid false positives and negatives in the evaluation of alert rules, + all ingested alert rule expressions are automatically qualified using Juju + Topology filters. This ensures that alert rules provided by your charm, trigger + alerts based only on data scrapped from your charm. For example an alert rule + such as the following + + alert: UnitUnavailable + expr: up < 1 + for: 0m + + will be automatically transformed into something along the lines of the following + + alert: UnitUnavailable + expr: up{juju_model=, juju_model_uuid=, juju_application=} < 1 + for: 0m + + An attempt will be made to validate alert rules prior to loading them into Prometheus. + If they are invalid, an event will be emitted from this object which charms can respond + to in order to set a meaningful status for administrators. + + This can be observed via `consumer.on.alert_rule_status_changed` which contains: + - The error(s) encountered when validating as `errors` + - A `valid` attribute, which can be used to reset the state of charms if alert rules + are updated via another mechanism (e.g. `cos-config`) and refreshed. + + Args: + charm: a `CharmBase` object that manages this + `MetricsEndpointProvider` object. Typically, this is + `self` in the instantiating class. + relation_name: an optional string name of the relation between `charm` + and the Prometheus charmed service. The default is "metrics-endpoint". + It is strongly advised not to change the default, so that people + deploying your charm will have a consistent experience with all + other charms that provide metrics endpoints. + jobs: an optional list of dictionaries where each + dictionary represents the Prometheus scrape + configuration for a single job. When not provided, a + default scrape configuration is provided for the + `/metrics` endpoint polling all units of the charm on port `80` + using the `MetricsEndpointProvider` object. + alert_rules_path: an optional path for the location of alert rules + files. Defaults to "./prometheus_alert_rules", + resolved relative to the directory hosting the charm entry file. + The alert rules are automatically updated on charm upgrade. + forward_alert_rules: a boolean flag to toggle forwarding of charmed alert rules. + refresh_event: an optional bound event or list of bound events which + will be observed to re-set scrape job data (IP address and others) + external_url: an optional argument that represents an external url that + can be generated by an Ingress or a Proxy. + lookaside_jobs_callable: an optional `Callable` which should be invoked + when the job configuration is built as a secondary mapping. The callable + should return a `List[Dict]` which is syntactically identical to the + `jobs` parameter, but can be updated out of step initialization of + this library without disrupting the 'global' job spec. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `prometheus_scrape` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + + try: + alert_rules_path = _resolve_dir_against_charm_path(charm, alert_rules_path) + except InvalidAlertRulePathError as e: + logger.debug( + "Invalid Prometheus alert rules folder at %s: %s", + e.alert_rules_absolute_path, + e.message, + ) + + super().__init__(charm, relation_name) + self.topology = JujuTopology.from_charm(charm) + + self._charm = charm + self._alert_rules_path = alert_rules_path + self._forward_alert_rules = forward_alert_rules + self._relation_name = relation_name + # sanitize job configurations to the supported subset of parameters + jobs = [] if jobs is None else jobs + self._jobs = PrometheusConfig.sanitize_scrape_configs(jobs) + + if external_url: + external_url = ( + external_url if urlparse(external_url).scheme else ("http://" + external_url) + ) + self.external_url = external_url + self._lookaside_jobs = lookaside_jobs_callable + + events = self._charm.on[self._relation_name] + self.framework.observe(events.relation_changed, self._on_relation_changed) + + if not refresh_event: + # FIXME remove once podspec charms are verified. + # `self.set_scrape_job_spec()` is called every re-init so this should not be needed. + if len(self._charm.meta.containers) == 1: + if "kubernetes" in self._charm.meta.series: + # This is a podspec charm + refresh_event = [self._charm.on.update_status] + else: + # This is a sidecar/pebble charm + container = list(self._charm.meta.containers.values())[0] + refresh_event = [self._charm.on[container.name.replace("-", "_")].pebble_ready] + else: + logger.warning( + "%d containers are present in metadata.yaml and " + "refresh_event was not specified. Defaulting to update_status. " + "Metrics IP may not be set in a timely fashion.", + len(self._charm.meta.containers), + ) + refresh_event = [self._charm.on.update_status] + + else: + if not isinstance(refresh_event, list): + refresh_event = [refresh_event] + + self.framework.observe(events.relation_joined, self.set_scrape_job_spec) + for ev in refresh_event: + self.framework.observe(ev, self.set_scrape_job_spec) + + def _on_relation_changed(self, event): + """Check for alert rule messages in the relation data before moving on.""" + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + scrape_errors = ev.get("scrape_job_errors", None) + if scrape_errors: + self.on.invalid_scrape_job.emit(errors=scrape_errors) + + def update_scrape_job_spec(self, jobs): + """Update scrape job specification.""" + self._jobs = PrometheusConfig.sanitize_scrape_configs(jobs) + self.set_scrape_job_spec() + + def set_scrape_job_spec(self, _=None): + """Ensure scrape target information is made available to prometheus. + + When a metrics provider charm is related to a prometheus charm, the + metrics provider sets specification and metadata related to its own + scrape configuration. This information is set using Juju application + data. In addition, each of the consumer units also sets its own + host address in Juju unit relation data. + """ + self._set_unit_ip() + + if not self._charm.unit.is_leader(): + return + + alert_rules = AlertRules(query_type="promql", topology=self.topology) + if self._forward_alert_rules: + alert_rules.add_path(self._alert_rules_path, recursive=True) + alert_rules.add( + copy.deepcopy(generic_alert_groups.application_rules), + group_name_prefix=self.topology.identifier, + ) + alert_rules_as_dict = alert_rules.as_dict() + + for relation in self._charm.model.relations[self._relation_name]: + relation.data[self._charm.app]["scrape_metadata"] = json.dumps(self._scrape_metadata) + relation.data[self._charm.app]["scrape_jobs"] = json.dumps(self._scrape_jobs) + + # Update relation data with the string representation of the rule file. + # Juju topology is already included in the "scrape_metadata" field above. + # The consumer side of the relation uses this information to name the rules file + # that is written to the filesystem. + relation.data[self._charm.app]["alert_rules"] = json.dumps(alert_rules_as_dict) + + def _set_unit_ip(self, _=None): + """Set unit host address. + + Each time a metrics provider charm container is restarted it updates its own + host address in the unit relation data for the prometheus charm. + + The only argument specified is an event, and it ignored. This is for expediency + to be able to use this method as an event handler, although no access to the + event is actually needed. + """ + for relation in self._charm.model.relations[self._relation_name]: + unit_ip = str(self._charm.model.get_binding(relation).network.bind_address) + + # TODO store entire url in relation data, instead of only select url parts. + + if self.external_url: + parsed = urlparse(self.external_url) + unit_address = parsed.hostname + path = parsed.path + unit_fqdn = "" + elif self._is_valid_unit_address(unit_ip): + unit_address = unit_ip + unit_fqdn = socket.getfqdn() + path = "" + else: + unit_address = socket.getfqdn() + unit_fqdn = unit_address + path = "" + + relation.data[self._charm.unit].update({ + "prometheus_scrape_unit_address": unit_address, + "prometheus_scrape_unit_path": path, + "prometheus_scrape_unit_name": str(self._charm.model.unit.name), + "prometheus_scrape_unit_fqdn": unit_fqdn, + }) + + def _is_valid_unit_address(self, address: str) -> bool: + """Validate a unit address. + + At present only IP address validation is supported, but + this may be extended to DNS addresses also, as needed. + + Args: + address: a string representing a unit address + """ + try: + _ = ipaddress.ip_address(address) + except ValueError: + return False + + return True + + @property + def _scrape_jobs(self) -> list: + """Fetch list of scrape jobs. + + Returns: + A list of dictionaries, where each dictionary specifies a + single scrape job for Prometheus. + """ + jobs = self._jobs or [] + if callable(self._lookaside_jobs): + jobs.extend(PrometheusConfig.sanitize_scrape_configs(self._lookaside_jobs())) + return jobs or [DEFAULT_JOB] + + @property + def _scrape_metadata(self) -> dict: + """Generate scrape metadata. + + Returns: + Scrape configuration metadata for this metrics provider charm. + """ + return self.topology.as_dict() + + +class PrometheusRulesProvider(Object): + """Forward rules to Prometheus. + + This object may be used to forward rules to Prometheus. At present it only supports + forwarding alert rules. This is unlike :class:`MetricsEndpointProvider`, which + is used for forwarding both scrape targets and associated alert rules. This object + is typically used when there is a desire to forward rules that apply globally (across + all deployed charms and units) rather than to a single charm. All rule files are + forwarded using the same 'prometheus_scrape' interface that is also used by + `MetricsEndpointProvider`. + + Args: + charm: A charm instance that `provides` a relation with the `prometheus_scrape` interface. + relation_name: Name of the relation in `metadata.yaml` that + has the `prometheus_scrape` interface. + dir_path: Root directory for the collection of rule files. + recursive: Whether to scan for rule files recursively. + """ + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + dir_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive=True, + ): + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._recursive = recursive + + try: + dir_path = _resolve_dir_against_charm_path(charm, dir_path) + except InvalidAlertRulePathError as e: + logger.debug( + "Invalid Prometheus alert rules folder at %s: %s", + e.alert_rules_absolute_path, + e.message, + ) + self.dir_path = dir_path + + events = self._charm.on[self._relation_name] + event_sources = [ + events.relation_joined, + events.relation_changed, + self._charm.on.leader_elected, + self._charm.on.upgrade_charm, + ] + + for event_source in event_sources: + self.framework.observe(event_source, self._update_relation_data) + + def _reinitialize_alert_rules(self): + """Reloads alert rules and updates all relations.""" + self._update_relation_data(None) + + def _update_relation_data(self, _): + """Update application relation data with alert rules for all relations.""" + if not self._charm.unit.is_leader(): + return + + alert_rules = AlertRules(query_type="promql") + alert_rules.add_path(self.dir_path, recursive=self._recursive) + alert_rules_as_dict = alert_rules.as_dict() + + logger.info("Updating relation data with rule files from disk") + for relation in self._charm.model.relations[self._relation_name]: + relation.data[self._charm.app]["alert_rules"] = json.dumps( + alert_rules_as_dict, + sort_keys=True, # sort, to prevent unnecessary relation_changed events + ) + +class CosTool: + """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" + + _path = None + _disabled = False + + def __init__(self, charm): + self._charm = charm + + @property + def path(self): + """Lazy lookup of the path of cos-tool.""" + if self._disabled: + return None + if not self._path: + self._path = self._get_tool_path() + if not self._path: + logger.debug("Skipping injection of juju topology as label matchers") + self._disabled = True + return self._path + + def apply_label_matchers(self, rules) -> dict: + """Will apply label matchers to the expression of all alerts in all supplied groups.""" + if not self.path: + return rules + for group in rules["groups"]: + rules_in_group = group.get("rules", []) + for rule in rules_in_group: + topology = {} + # if the user for some reason has provided juju_unit, we'll need to honor it + # in most cases, however, this will be empty + for label in [ + "juju_model", + "juju_model_uuid", + "juju_application", + "juju_charm", + "juju_unit", + ]: + if label in rule["labels"]: + topology[label] = rule["labels"][label] + + rule["expr"] = self.inject_label_matchers(rule["expr"], topology) + return rules + + def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: + """Will validate correctness of alert rules, returning a boolean and any errors.""" + if not self.path: + logger.debug("`cos-tool` unavailable. Not validating alert correctness.") + return True, "" + + with tempfile.TemporaryDirectory() as tmpdir: + rule_path = Path(tmpdir + "/validate_rule.yaml") + rule_path.write_text(yaml.dump(rules)) + + args = [str(self.path), "validate", str(rule_path)] + # noinspection PyBroadException + try: + self._exec(args) + return True, "" + except subprocess.CalledProcessError as e: + logger.debug("Validating the rules failed: %s", e.output.decode("utf8")) + return False, ", ".join( + [ + line + for line in e.output.decode("utf8").splitlines() + if "error validating" in line + ] + ) + + def validate_scrape_jobs(self, jobs: list) -> bool: + """Validate scrape jobs using cos-tool.""" + if not self.path: + logger.debug("`cos-tool` unavailable. Not validating scrape jobs.") + return True + conf = {"scrape_configs": jobs} + with tempfile.NamedTemporaryFile() as tmpfile: + with open(tmpfile.name, "w") as f: + f.write(yaml.safe_dump(conf)) + try: + self._exec([str(self.path), "validate-config", tmpfile.name]) + except subprocess.CalledProcessError as e: + logger.error("Validating scrape jobs failed: {}".format(e.output)) + raise + return True + + def inject_label_matchers(self, expression, topology) -> str: + """Add label matchers to an expression.""" + if not topology: + return expression + if not self.path: + logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) + return expression + args = [str(self.path), "transform"] + args.extend( + ["--label-matcher={}={}".format(key, value) for key, value in topology.items()] + ) + + args.extend(["{}".format(expression)]) + # noinspection PyBroadException + try: + return self._exec(args) + except subprocess.CalledProcessError as e: + logger.debug('Applying the expression failed: "%s", falling back to the original', e) + return expression + + def _get_tool_path(self) -> Optional[Path]: + arch = platform.machine() + arch = "amd64" if arch == "x86_64" else arch + res = "cos-tool-{}".format(arch) + try: + path = Path(res).resolve(strict=True) + return path + except (FileNotFoundError, OSError): + logger.debug('Could not locate cos-tool at: "{}"'.format(res)) + return None + + def _exec(self, cmd) -> str: + result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + return result.stdout.decode("utf-8").strip() diff --git a/charms/garm-operator/lib/charms/redis_k8s/v0/redis.py b/charms/garm-operator/lib/charms/redis_k8s/v0/redis.py new file mode 100644 index 00000000..e28b14c2 --- /dev/null +++ b/charms/garm-operator/lib/charms/redis_k8s/v0/redis.py @@ -0,0 +1,153 @@ +"""Library for the redis relation. + +This library contains the Requires and Provides classes for handling the +redis interface. + +Import `RedisRequires` in your charm by adding the following to `src/charm.py`: +``` +from charms.redis_k8s.v0.redis import RedisRequires +``` +Define the following attributes in charm charm class for the library to be able to work with it +``` + on = RedisRelationCharmEvents() +``` +And then wherever you need to reference the relation data it will be available +in the property `relation_data`: +``` +redis_host = self.redis.relation_data.get("hostname") +redis_port = self.redis.relation_data.get("port") +``` +You will also need to add the following to `metadata.yaml`: +``` +requires: + redis: + interface: redis +``` +""" +import logging +import socket +from typing import Dict, Optional + +from ops.charm import CharmEvents +from ops.framework import EventBase, EventSource, Object + +# The unique Charmhub library identifier, never change it. +LIBID = "fe18a608cec5465fa5153e419abcad7b" + +# Increment this major API version when introducing breaking changes. +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version. +LIBPATCH = 7 + +logger = logging.getLogger(__name__) + +DEFAULT_REALTION_NAME = "redis" + +class RedisRelationUpdatedEvent(EventBase): + """An event for the redis relation having been updated.""" + + +class RedisRelationCharmEvents(CharmEvents): + """A class to carry custom charm events so requires can react to relation changes.""" + redis_relation_updated = EventSource(RedisRelationUpdatedEvent) + + +class RedisRequires(Object): + + def __init__(self, charm, relation_name: str = DEFAULT_REALTION_NAME): + """A class implementing the redis requires relation.""" + super().__init__(charm, relation_name) + self.framework.observe(charm.on[relation_name].relation_joined, self._on_relation_changed) + self.framework.observe(charm.on[relation_name].relation_changed, self._on_relation_changed) + self.framework.observe(charm.on[relation_name].relation_broken, self._on_relation_broken) + self.charm = charm + self.relation_name = relation_name + + def _on_relation_changed(self, event): + """Handle the relation changed event.""" + if not event.unit: + return + + # Trigger an event that our charm can react to. + self.charm.on.redis_relation_updated.emit() + + def _on_relation_broken(self, event): + """Handle the relation broken event.""" + # Trigger an event that our charm can react to. + self.charm.on.redis_relation_updated.emit() + + @property + def app_data(self) -> Optional[Dict[str, str]]: + """Retrieve the app data. + + Returns: + Dict: dict containing the app data. + """ + relation = self.model.get_relation(self.relation_name) + if not relation: + return None + return relation.data[relation.app] + + @property + def relation_data(self) -> Optional[Dict[str, str]]: + """Retrieve the relation data. + + Returns: + Dict: dict containing the relation data. + """ + relation = self.model.get_relation(self.relation_name) + if not relation or not relation.units: + return None + unit = next(iter(relation.units)) + return relation.data[unit] + + @property + def url(self) -> Optional[str]: + """Retrieve the Redis URL. + + Returns: + str: the Redis URL. + """ + if not (relation_data := self.relation_data): + return None + + redis_host = relation_data.get("hostname") + + if app_data := self.app_data: + try: + redis_host = self.app_data.get("leader-host", redis_host) + except KeyError: + pass + redis_port = relation_data.get("port") + return f"redis://{redis_host}:{redis_port}" + + +class RedisProvides(Object): + def __init__(self, charm, port): + """A class implementing the redis provides relation.""" + super().__init__(charm, DEFAULT_REALTION_NAME) + self.framework.observe(charm.on.redis_relation_changed, self._on_relation_changed) + self._port = port + self._charm = charm + + def _on_relation_changed(self, event): + """Handle the relation changed event.""" + event.relation.data[self.model.unit]["hostname"] = self._get_master_ip() + event.relation.data[self.model.unit]["port"] = str(self._port) + # The reactive Redis charm also exposes 'password'. When tackling + # https://github.com/canonical/redis-k8s/issues/7 add 'password' + # field so that it matches the exposed interface information from it. + # event.relation.data[self.unit]['password'] = '' + + def _bind_address(self, event): + """Convenience function for getting the unit address.""" + relation = self.model.get_relation(event.relation.name, event.relation.id) + if address := self.model.get_binding(relation).network.bind_address: + return address + return self.app.name + + def _get_master_ip(self) -> str: + """Gets the ip of the current redis master.""" + return socket.gethostbyname(self._charm.current_master) diff --git a/charms/garm-operator/lib/charms/tempo_coordinator_k8s/v0/tracing.py b/charms/garm-operator/lib/charms/tempo_coordinator_k8s/v0/tracing.py new file mode 100644 index 00000000..387ac6a4 --- /dev/null +++ b/charms/garm-operator/lib/charms/tempo_coordinator_k8s/v0/tracing.py @@ -0,0 +1,1010 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to integrate with the Tempo charm for the purpose of pushing traces to a +tracing endpoint provided by Tempo. It also explains how alternative implementations of the Tempo charm +may maintain the same interface and be backward compatible with all currently integrated charms. + +## Requirer Library Usage + +Charms seeking to push traces to Tempo, must do so using the `TracingEndpointRequirer` +object from this charm library. For the simplest use cases, using the `TracingEndpointRequirer` +object only requires instantiating it, typically in the constructor of your charm. The +`TracingEndpointRequirer` constructor requires the name of the relation over which a tracing endpoint + is exposed by the Tempo charm, and a list of protocols it intends to send traces with. + This relation must use the `tracing` interface. + The `TracingEndpointRequirer` object may be instantiated as follows + + from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer + + def __init__(self, *args): + super().__init__(*args) + # ... + self.tracing = TracingEndpointRequirer(self, + protocols=['otlp_grpc', 'otlp_http', 'jaeger_http_thrift'] + ) + # ... + +Note that the first argument (`self`) to `TracingEndpointRequirer` is always a reference to the +parent charm. + +Alternatively to providing the list of requested protocols at init time, the charm can do it at +any point in time by calling the +`TracingEndpointRequirer.request_protocols(*protocol:str, relation:Optional[Relation])` method. +Using this method also allows you to use per-relation protocols. + +Units of requirer charms obtain the tempo endpoint to which they will push their traces by calling +`TracingEndpointRequirer.get_endpoint(protocol: str)`, where `protocol` is, for example: +- `otlp_grpc` +- `otlp_http` +- `zipkin` +- `tempo` + +If the `protocol` is not in the list of protocols that the charm requested at endpoint set-up time, +the library will raise an error. + +We recommend that you scale up your tracing provider and relate it to an ingress so that your tracing requests +go through the ingress and get load balanced across all units. Otherwise, if the provider's leader goes down, your tracing goes down. + +## Provider Library Usage + +The `TracingEndpointProvider` object may be used by charms to manage relations with their +trace sources. For this purposes a Tempo-like charm needs to do two things + +1. Instantiate the `TracingEndpointProvider` object by providing it a +reference to the parent (Tempo) charm and optionally the name of the relation that the Tempo charm +uses to interact with its trace sources. This relation must conform to the `tracing` interface +and it is strongly recommended that this relation be named `tracing` which is its +default value. + +For example a Tempo charm may instantiate the `TracingEndpointProvider` in its constructor as +follows + + from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointProvider + + def __init__(self, *args): + super().__init__(*args) + # ... + self.tracing = TracingEndpointProvider(self) + # ... + + + +""" # noqa: W505 + +import enum +import json +import logging +from pathlib import Path +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Literal, + MutableMapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +import pydantic +from ops.charm import ( + CharmBase, + CharmEvents, + RelationBrokenEvent, + RelationEvent, + RelationRole, +) +from ops.framework import EventSource, Object +from ops.model import ModelError, Relation +from pydantic import BaseModel, Field + +# The unique Charmhub library identifier, never change it +LIBID = "d2f02b1f8d1244b5989fd55bc3a28943" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 11 + +PYDEPS = ["pydantic"] + +logger = logging.getLogger(__name__) + +DEFAULT_RELATION_NAME = "tracing" +RELATION_INTERFACE_NAME = "tracing" + +# Supported list rationale https://github.com/canonical/tempo-coordinator-k8s-operator/issues/8 +ReceiverProtocol = Literal[ + "zipkin", + "otlp_grpc", + "otlp_http", + "jaeger_grpc", + "jaeger_thrift_http", +] + +RawReceiver = Tuple[ReceiverProtocol, str] +# Helper type. A raw receiver is defined as a tuple consisting of the protocol name, and the (external, if available), +# (secured, if available) resolvable server url. + + +BUILTIN_JUJU_KEYS = {"ingress-address", "private-address", "egress-subnets"} + + +class TransportProtocolType(str, enum.Enum): + """Receiver Type.""" + + http = "http" + grpc = "grpc" + + +receiver_protocol_to_transport_protocol: Dict[ + ReceiverProtocol, TransportProtocolType +] = { + "zipkin": TransportProtocolType.http, + "otlp_grpc": TransportProtocolType.grpc, + "otlp_http": TransportProtocolType.http, + "jaeger_thrift_http": TransportProtocolType.http, + "jaeger_grpc": TransportProtocolType.grpc, +} +# A mapping between telemetry protocols and their corresponding transport protocol. + + +class TracingError(Exception): + """Base class for custom errors raised by this library.""" + + +class NotReadyError(TracingError): + """Raised by the provider wrapper if a requirer hasn't published the required data (yet).""" + + +class ProtocolNotRequestedError(TracingError): + """Raised if the user attempts to obtain an endpoint for a protocol it did not request.""" + + +class DataValidationError(TracingError): + """Raised when data validation fails on IPU relation data.""" + + +class DataAccessPermissionError(TracingError): + """Raised when follower units attempt leader-only operations.""" + + +class AmbiguousRelationUsageError(TracingError): + """Raised when one wrongly assumes that there can only be one relation on an endpoint.""" + + +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class DatabagModel(BaseModel): # type: ignore + """Base databag model.""" + + class Config: + """Pydantic config.""" + + # ignore any extra fields in the databag + extra = "ignore" + """Ignore any extra fields in the databag.""" + allow_population_by_field_name = True + """Allow instantiating this class by field name (instead of forcing alias).""" + + _NEST_UNDER = None + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + if cls._NEST_UNDER: + return cls.parse_obj(json.loads(databag[cls._NEST_UNDER])) + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {f.alias for f in cls.__fields__.values()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.parse_raw(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + + if self._NEST_UNDER: + databag[self._NEST_UNDER] = self.json(by_alias=True) + return databag + + dct = self.dict() + for key, field in self.__fields__.items(): # type: ignore + value = dct[key] + databag[field.alias or key] = json.dumps(value) + + return databag + +else: + from pydantic import ConfigDict + + class DatabagModel(BaseModel): + """Base databag model.""" + + model_config = ConfigDict( + # ignore any extra fields in the databag + extra="ignore", + # Allow instantiating this class by field name (instead of forcing alias). + populate_by_name=True, + # Custom config key: whether to nest the whole datastructure (as json) + # under a field or spread it out at the toplevel. + _NEST_UNDER=None, # type: ignore + ) + """Pydantic config.""" + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + nest_under = cls.model_config.get("_NEST_UNDER") # type: ignore + if nest_under: + return cls.model_validate(json.loads(databag[nest_under])) # type: ignore + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {(f.alias or n) for n, f in cls.__fields__.items()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.model_validate_json(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + nest_under = self.model_config.get("_NEST_UNDER") + if nest_under: + databag[nest_under] = self.model_dump_json( # type: ignore + by_alias=True, + # skip keys whose values are default + exclude_defaults=True, + ) + return databag + + dct = self.model_dump() # type: ignore + for key, field in self.model_fields.items(): # type: ignore + value = dct[key] + if value == field.default: + continue + databag[field.alias or key] = json.dumps(value) + + return databag + + +# todo use models from charm-relation-interfaces +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class ProtocolType(BaseModel): # type: ignore + """Protocol Type.""" + + class Config: + """Pydantic config.""" + + use_enum_values = True + """Allow serializing enum values.""" + + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + +else: + + class ProtocolType(BaseModel): + """Protocol Type.""" + + model_config = ConfigDict( # type: ignore + # Allow serializing enum values. + use_enum_values=True + ) + """Pydantic config.""" + + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + + +class Receiver(BaseModel): + """Specification of an active receiver.""" + + protocol: ProtocolType = Field(..., description="Receiver protocol name and type.") + url: str = Field( + ..., + description="""URL at which the receiver is reachable. If there's an ingress, it would be the external URL. + Otherwise, it would be the service's fqdn or internal IP. + If the protocol type is grpc, the url will not contain a scheme.""", + examples=[ + "http://traefik_address:2331", + "https://traefik_address:2331", + "http://tempo_public_ip:2331", + "https://tempo_public_ip:2331", + "tempo_public_ip:2331", + ], + ) + + +class TracingProviderAppData(DatabagModel): # noqa: D101 # type: ignore + """Application databag model for the tracing provider.""" + + receivers: List[Receiver] = Field( + ..., + description="List of all receivers enabled on the tracing provider.", + ) + + +class TracingRequirerAppData(DatabagModel): # noqa: D101 # type: ignore + """Application databag model for the tracing requirer.""" + + receivers: List[ReceiverProtocol] + """Requested receivers.""" + + +class _AutoSnapshotEvent(RelationEvent): + __args__: Tuple[str, ...] = () + __optional_kwargs__: Dict[str, Any] = {} + + @classmethod + def __attrs__(cls): + return cls.__args__ + tuple(cls.__optional_kwargs__.keys()) + + def __init__(self, handle, relation, *args, **kwargs): + super().__init__(handle, relation) + + if not len(self.__args__) == len(args): + raise TypeError( + "expected {} args, got {}".format(len(self.__args__), len(args)) + ) + + for attr, obj in zip(self.__args__, args): + setattr(self, attr, obj) + for attr, default in self.__optional_kwargs__.items(): + obj = kwargs.get(attr, default) + setattr(self, attr, obj) + + def snapshot(self) -> dict: + dct = super().snapshot() + for attr in self.__attrs__(): + obj = getattr(self, attr) + try: + dct[attr] = obj + except ValueError as e: + raise ValueError( + "cannot automagically serialize {}: " + "override this method and do it " + "manually.".format(obj) + ) from e + + return dct + + def restore(self, snapshot: dict) -> None: + super().restore(snapshot) + for attr, obj in snapshot.items(): + setattr(self, attr, obj) + + +class RelationNotFoundError(Exception): + """Raised if no relation with the given name is found.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + super().__init__(self.message) + + +class RelationInterfaceMismatchError(Exception): + """Raised if the relation with the given name has an unexpected interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + + super().__init__(self.message) + + +class RelationRoleMismatchError(Exception): + """Raised if the relation with the given name has a different role than expected.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = ( + "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + ) + + super().__init__(self.message) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Validate a relation. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + # fixme: why do we need to cast here? + actual_relation_interface = cast(str, relation.interface_name) + + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, expected_relation_interface, actual_relation_interface + ) + + if expected_relation_role is RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role is RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise TypeError( + "Unexpected RelationDirection: {}".format(expected_relation_role) + ) + + +class RequestEvent(RelationEvent): + """Event emitted when a remote requests a tracing endpoint.""" + + @property + def requested_receivers(self) -> List[ReceiverProtocol]: + """List of receiver protocols that have been requested.""" + relation = self.relation + app = relation.app + if not app: + raise NotReadyError("relation.app is None") + + return TracingRequirerAppData.load(relation.data[app]).receivers + + +class BrokenEvent(RelationBrokenEvent): + """Event emitted when a relation on tracing is broken.""" + + +class TracingEndpointProviderEvents(CharmEvents): + """TracingEndpointProvider events.""" + + request = EventSource(RequestEvent) + broken = EventSource(BrokenEvent) + + +class TracingEndpointProvider(Object): + """Class representing a trace receiver service.""" + + on = TracingEndpointProviderEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + external_url: Optional[str] = None, + relation_name: str = DEFAULT_RELATION_NAME, + ): + """Initialize. + + Args: + charm: a `CharmBase` instance that manages this instance of the Tempo service. + external_url: external address of the node hosting the tempo server, + if an ingress is present. + relation_name: an optional string name of the relation between `charm` + and the Tempo charmed service. The default is "tracing". + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `tracing` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + + super().__init__(charm, relation_name + "tracing-provider") + self._charm = charm + self._external_url = external_url + self._relation_name = relation_name + self.framework.observe( + self._charm.on[relation_name].relation_joined, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_created, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_changed, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_broken, + self._on_relation_broken_event, + ) + + def _on_relation_broken_event(self, e: RelationBrokenEvent): + """Handle relation broken events.""" + self.on.broken.emit(e.relation) + + def _on_relation_event(self, e: RelationEvent): + """Handle relation created/joined/changed events.""" + if self.is_requirer_ready(e.relation): + self.on.request.emit(e.relation) + + def is_requirer_ready(self, relation: Relation): + """Attempt to determine if requirer has already populated app data.""" + try: + self._get_requested_protocols(relation) + except NotReadyError: + return False + return True + + @staticmethod + def _get_requested_protocols(relation: Relation): + app = relation.app + if not app: + raise NotReadyError("relation.app is None") + + try: + databag = TracingRequirerAppData.load(relation.data[app]) + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): + logger.info("relation %s is not ready to talk tracing", relation) + raise NotReadyError() + return databag.receivers + + def requested_protocols(self): + """All receiver protocols that have been requested by our related apps.""" + requested_protocols = set() + for relation in self.relations: + try: + protocols = self._get_requested_protocols(relation) + except NotReadyError: + continue + requested_protocols.update(protocols) + return requested_protocols + + @property + def relations(self) -> List[Relation]: + """All relations active on this endpoint.""" + return self._charm.model.relations[self._relation_name] + + def publish_receivers(self, receivers: Sequence[RawReceiver]): + """Let all requirers know that these receivers are active and listening.""" + if not self._charm.unit.is_leader(): + raise RuntimeError("only leader can do this") + + for relation in self.relations: + try: + TracingProviderAppData( + receivers=[ + Receiver( + url=url, + protocol=ProtocolType( + name=protocol, + type=receiver_protocol_to_transport_protocol[protocol], + ), + ) + for protocol, url in receivers + ], + ).dump(relation.data[self._charm.app]) + + except ModelError as e: + # args are bytes + msg = e.args[0] + if isinstance(msg, bytes): + if msg.startswith( + b"ERROR cannot read relation application settings: permission denied" + ): + logger.error( + "encountered error %s while attempting to update_relation_data." + "The relation must be gone.", + e, + ) + continue + raise + + +class EndpointRemovedEvent(RelationBrokenEvent): + """Event representing a change in one of the receiver endpoints.""" + + +class EndpointChangedEvent(_AutoSnapshotEvent): + """Event representing a change in one of the receiver endpoints.""" + + __args__ = ("_receivers",) + + if TYPE_CHECKING: + _receivers = [] # type: List[dict] + + @property + def receivers(self) -> List[Receiver]: + """Cast receivers back from dict.""" + return [Receiver(**i) for i in self._receivers] + + +class TracingEndpointRequirerEvents(CharmEvents): + """TracingEndpointRequirer events.""" + + endpoint_changed = EventSource(EndpointChangedEvent) + endpoint_removed = EventSource(EndpointRemovedEvent) + + +class TracingEndpointRequirer(Object): + """A tracing endpoint for Tempo.""" + + on = TracingEndpointRequirerEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + protocols: Optional[List[ReceiverProtocol]] = None, + ): + """Construct a tracing requirer for a Tempo charm. + + If your application supports pushing traces to a distributed tracing backend, the + `TracingEndpointRequirer` object enables your charm to easily access endpoint information + exchanged over a `tracing` relation interface. + + Args: + charm: a `CharmBase` object that manages this + `TracingEndpointRequirer` object. Typically, this is `self` in the instantiating + class. + relation_name: an optional string name of the relation between `charm` + and the Tempo charmed service. The default is "tracing". It is strongly + advised not to change the default, so that people deploying your charm will have a + consistent experience with all other charms that provide tracing endpoints. + protocols: optional list of protocols that the charm intends to send traces with. + The provider will enable receivers for these and only these protocols, + so be sure to enable all protocols the charm or its workload are going to need. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `tracing` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + + super().__init__(charm, relation_name) + + self._is_single_endpoint = charm.meta.relations[relation_name].limit == 1 + + self._charm = charm + self._relation_name = relation_name + + events = self._charm.on[self._relation_name] + self.framework.observe( + events.relation_changed, self._on_tracing_relation_changed + ) + self.framework.observe(events.relation_broken, self._on_tracing_relation_broken) + + if protocols and self._charm.unit.is_leader(): + # we can't be sure that the current event context supports read/writing relation data for this relation, + # so we catch ModelErrors. This is because we're doing this in init. + try: + self.request_protocols(protocols) + except ModelError as e: + logger.error( + "encountered error %s while attempting to request_protocols." + "The relation must be gone.", + e, + ) + pass + + def request_protocols( + self, protocols: Sequence[ReceiverProtocol], relation: Optional[Relation] = None + ): + """Publish the list of protocols which the provider should activate.""" + # todo: should we check if _is_single_endpoint and len(self.relations) > 1 and raise, here? + relations = [relation] if relation else self.relations + + if not protocols: + # empty sequence + raise ValueError( + "You need to pass a nonempty sequence of protocols to `request_protocols`." + ) + + if self._charm.unit.is_leader(): + for relation in relations: + TracingRequirerAppData( + receivers=list(protocols), + ).dump(relation.data[self._charm.app]) + else: + raise DataAccessPermissionError("only leaders can request_protocols") + + @property + def relations(self) -> List[Relation]: + """The tracing relations associated with this endpoint.""" + return self._charm.model.relations[self._relation_name] + + @property + def _relation(self) -> Optional[Relation]: + """If this wraps a single endpoint, the relation bound to it, if any.""" + if not self._is_single_endpoint: + objname = type(self).__name__ + raise AmbiguousRelationUsageError( + f"This {objname} wraps a {self._relation_name} endpoint that has " + "limit != 1. We can't determine what relation, of the possibly many, you are " + f"talking about. Please pass a relation instance while calling {objname}, " + "or set limit=1 in the charm metadata." + ) + relations = self.relations + return relations[0] if relations else None + + def is_ready(self, relation: Optional[Relation] = None): + """Is this endpoint ready?""" + relation = relation or self._relation + if not relation: + logger.debug("no relation on %r: tracing not ready", self._relation_name) + return False + if relation.data is None: + logger.error("relation data is None for %s", relation) + return False + if not relation.app: + logger.error("%s event received but there is no relation.app", relation) + return False + try: + databag = dict(relation.data[relation.app]) + TracingProviderAppData.load(databag) + + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): + logger.info("failed validating relation data for %s", relation) + return False + return True + + def _on_tracing_relation_changed(self, event): + """Notify the providers that there is new endpoint information available.""" + relation = event.relation + if not self.is_ready(relation): + self.on.endpoint_removed.emit(relation) # type: ignore + return + + data = TracingProviderAppData.load(relation.data[relation.app]) + self.on.endpoint_changed.emit(relation, [i.dict() for i in data.receivers]) # type: ignore + + def _on_tracing_relation_broken(self, event: RelationBrokenEvent): + """Notify the providers that the endpoint is broken.""" + relation = event.relation + self.on.endpoint_removed.emit(relation) # type: ignore + + def get_all_endpoints( + self, relation: Optional[Relation] = None + ) -> Optional[TracingProviderAppData]: + """Unmarshalled relation data.""" + relation = relation or self._relation + if not self.is_ready(relation): + return + return TracingProviderAppData.load(relation.data[relation.app]) # type: ignore + + def _get_endpoint( + self, relation: Optional[Relation], protocol: ReceiverProtocol + ) -> Optional[str]: + app_data = self.get_all_endpoints(relation) + if not app_data: + return None + receivers: List[Receiver] = list( + filter(lambda i: i.protocol.name == protocol, app_data.receivers) + ) + if not receivers: + # it can happen if the charm requests tracing protocols, but the relay (such as grafana-agent) isn't yet + # connected to the tracing backend. In this case, it's not an error the charm author can do anything about + logger.warning("no receiver found with protocol=%r.", protocol) + return + if len(receivers) > 1: + # if we have more than 1 receiver that matches, it shouldn't matter which receiver we'll be using. + logger.warning( + "too many receivers with protocol=%r; using first one. Found: %s", + protocol, + receivers, + ) + + receiver = receivers[0] + return receiver.url + + def get_endpoint( + self, protocol: ReceiverProtocol, relation: Optional[Relation] = None + ) -> Optional[str]: + """Receiver endpoint for the given protocol. + + It could happen that this function gets called before the provider publishes the endpoints. + In such a scenario, if a non-leader unit calls this function, a permission denied exception will be raised due to + restricted access. To prevent this, this function needs to be guarded by the `is_ready` check. + + Raises: + ProtocolNotRequestedError: + If the charm unit is the leader unit and attempts to obtain an endpoint for a protocol it did not request. + """ + endpoint = self._get_endpoint(relation or self._relation, protocol=protocol) + if not endpoint: + requested_protocols = set() + relations = [relation] if relation else self.relations + for relation in relations: + try: + databag = TracingRequirerAppData.load( + relation.data[self._charm.app] + ) + except DataValidationError: + continue + + requested_protocols.update(databag.receivers) + + if protocol not in requested_protocols: + raise ProtocolNotRequestedError(protocol, relation) + + return None + return endpoint + + +def charm_tracing_config( + endpoint_requirer: TracingEndpointRequirer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[str], Optional[str]]: + """Return the charm_tracing config you likely want. + + If no endpoint is provided: + disable charm tracing. + If https endpoint is provided but cert_path is not found on disk: + disable charm tracing. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm tracing (with or without tls, as appropriate) + + Usage: + >>> from lib.charms.tempo_coordinator_k8s.v0.charm_tracing import trace_charm + >>> from lib.charms.tempo_coordinator_k8s.v0.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self.my_endpoint, self.cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + """ + if not endpoint_requirer.is_ready(): + return None, None + + try: + endpoint = endpoint_requirer.get_endpoint("otlp_http") + except ModelError as e: + if e.args[0] == "ERROR permission denied\n": + # this can happen the app databag doesn't have data, + # or we're breaking the relation. + return None, None + raise + + if not endpoint: + return None, None + + is_https = endpoint.startswith("https://") + + if is_https: + if cert_path is None or not Path(cert_path).exists(): + # disable charm tracing until we obtain a cert to prevent tls errors + logger.error( + "Tracing endpoint is https, but no server_cert has been passed." + "Please point @trace_charm to a `server_cert` attr. " + "This might also mean that the tracing provider is related to a " + "certificates provider, but this application is not (yet). " + "In that case, you might just have to wait a bit for the certificates " + "integration to settle. " + ) + return None, None + return endpoint, str(cert_path) + else: + return endpoint, None diff --git a/charms/garm-operator/lib/charms/traefik_k8s/v2/ingress.py b/charms/garm-operator/lib/charms/traefik_k8s/v2/ingress.py new file mode 100644 index 00000000..4d03c786 --- /dev/null +++ b/charms/garm-operator/lib/charms/traefik_k8s/v2/ingress.py @@ -0,0 +1,949 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +r"""# Interface Library for ingress. + +This library wraps relation endpoints using the `ingress` interface +and provides a Python API for both requesting and providing per-application +ingress, with load-balancing occurring across all units. + +## Getting Started + +To get started using the library, you just need to fetch the library using `charmcraft`. + +```shell +cd some-charm +charmcraft fetch-lib charms.traefik_k8s.v2.ingress +``` + +In the `metadata.yaml` of the charm, add the following: + +```yaml +requires: + ingress: + interface: ingress + limit: 1 +``` + +Then, to initialise the library: + +```python +from charms.traefik_k8s.v2.ingress import (IngressPerAppRequirer, + IngressPerAppReadyEvent, IngressPerAppRevokedEvent) + +class SomeCharm(CharmBase): + def __init__(self, *args): + # ... + self.ingress = IngressPerAppRequirer(self, port=80) + # The following event is triggered when the ingress URL to be used + # by this deployment of the `SomeCharm` is ready (or changes). + self.framework.observe( + self.ingress.on.ready, self._on_ingress_ready + ) + self.framework.observe( + self.ingress.on.revoked, self._on_ingress_revoked + ) + + def _on_ingress_ready(self, event: IngressPerAppReadyEvent): + logger.info("This app's ingress URL: %s", event.url) + + def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): + logger.info("This app no longer has ingress") +""" + +import ipaddress +import json +import logging +import socket +import typing +from dataclasses import dataclass +from functools import partial +from typing import ( + Any, + Callable, + Dict, + List, + MutableMapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +import pydantic +from ops import EventBase +from ops.charm import CharmBase, RelationBrokenEvent, RelationEvent +from ops.framework import EventSource, Object, ObjectEvents, StoredState +from ops.model import ModelError, Relation, Unit +from pydantic import AnyHttpUrl, BaseModel, Field + +# The unique Charmhub library identifier, never change it +LIBID = "e6de2a5cd5b34422a204668f3b8f90d2" + +# Increment this major API version when introducing breaking changes +LIBAPI = 2 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 20 + +PYDEPS = ["pydantic"] + +DEFAULT_RELATION_NAME = "ingress" +RELATION_INTERFACE = "ingress" + +log = logging.getLogger(__name__) +BUILTIN_JUJU_KEYS = {"ingress-address", "private-address", "egress-subnets"} + +PYDANTIC_IS_V1 = int(pydantic.version.VERSION.split(".")[0]) < 2 +if PYDANTIC_IS_V1: # noqa + from pydantic import validator + + input_validator = partial(validator, pre=True) + + class DatabagModel(BaseModel): # type: ignore + """Base databag model.""" + + class Config: + """Pydantic config.""" + + allow_population_by_field_name = True + """Allow instantiating this class by field name (instead of forcing alias).""" + + _NEST_UNDER = None + + # Annotating -> "DatabagModel" as the return type here doesn't sit well with pyright + # We are disabling this line for now and come back to it later. + @classmethod + def load(cls, databag: MutableMapping): # type: ignore[no-untyped-def] + """Load this model from a Juju databag.""" + if cls._NEST_UNDER: + return cls.parse_obj(json.loads(databag[cls._NEST_UNDER])) + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {f.alias for f in cls.__fields__.values()} # type: ignore + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + log.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.parse_raw(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + log.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True) -> Any: + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + + if self._NEST_UNDER: + databag[self._NEST_UNDER] = self.json(by_alias=True, exclude_defaults=True) + return databag + + for key, value in self.dict(by_alias=True, exclude_defaults=True).items(): # type: ignore # noqa + databag[key] = json.dumps(value) + + return databag + +else: + from pydantic import ConfigDict, field_validator + + input_validator = partial(field_validator, mode="before") # type: ignore + + class DatabagModel(BaseModel): # type: ignore + """Base databag model.""" + + model_config = ConfigDict( + # tolerate additional keys in databag + extra="ignore", + # Allow instantiating this class by field name (instead of forcing alias). + populate_by_name=True, + # Custom config key: whether to nest the whole datastructure (as json) + # under a field or spread it out at the toplevel. + _NEST_UNDER=None, + ) # type: ignore + """Pydantic config.""" + + # Annotating -> "DatabagModel" as the return type here doesn't sit well with pyright + # We are disabling this line for now and come back to it later. + @classmethod + def load(cls, databag: MutableMapping): # type: ignore[no-untyped-def] + """Load this model from a Juju databag.""" + nest_under = cls.model_config.get("_NEST_UNDER") + if nest_under: + return cls.model_validate(json.loads(databag[nest_under])) # type: ignore + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {(f.alias or n) for n, f in cls.model_fields.items()} # type: ignore + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + log.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.model_validate_json(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + log.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True) -> Any: + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + nest_under = self.model_config.get("_NEST_UNDER") + if nest_under: + databag[nest_under] = self.model_dump_json( # type: ignore + by_alias=True, + # skip keys whose values are default + exclude_defaults=True, + ) + return databag + + dct = self.model_dump( + mode="json", + by_alias=True, + exclude_defaults=True, # type: ignore + ) + databag.update({k: json.dumps(v) for k, v in dct.items()}) + return databag + + +# todo: import these models from charm-relation-interfaces/ingress/v2 instead of redeclaring them +class IngressUrl(BaseModel): + """Ingress url schema.""" + + url: AnyHttpUrl + + +class IngressProviderAppData(DatabagModel): + """Ingress application databag schema.""" + + ingress: Optional[IngressUrl] = None + + +class ProviderSchema(BaseModel): + """Provider schema for Ingress.""" + + app: IngressProviderAppData + + +class IngressHealthCheck(BaseModel): + """HealthCheck schema for Ingress.""" + + path: str = Field(description="The health check endpoint path (required).") + scheme: Optional[str] = Field( + default=None, description="Replaces the server URL scheme for the health check endpoint." + ) + hostname: Optional[str] = Field( + default=None, description="Hostname to be set in the health check request." + ) + port: Optional[int] = Field( + default=None, description="Replaces the server URL port for the health check endpoint." + ) + interval: str = Field(default="30s", description="Frequency of the health check calls.") + timeout: str = Field(default="5s", description="Maximum duration for a health check request.") + + +class IngressRequirerAppData(DatabagModel): + """Ingress requirer application databag model.""" + + model: str = Field(description="The model the application is in.") + name: str = Field(description="the name of the app requesting ingress.") + port: int = Field(description="The port the app wishes to be exposed.") + healthcheck_params: Optional[IngressHealthCheck] = Field( + default=None, description="Optional health check configuration for ingress." + ) + + # fields on top of vanilla 'ingress' interface: + strip_prefix: Optional[bool] = Field( + default=False, + description="Whether to strip the prefix from the ingress url.", + alias="strip-prefix", + ) + redirect_https: Optional[bool] = Field( + default=False, + description="Whether to redirect http traffic to https.", + alias="redirect-https", + ) + + scheme: Optional[str] = Field( + default="http", description="What scheme to use in the generated ingress url" + ) + + # pydantic wants 'cls' as first arg + @input_validator("scheme") + def validate_scheme(cls, scheme: str) -> str: # noqa: N805 + """Validate scheme arg.""" + if scheme not in {"http", "https", "h2c"}: + raise ValueError("invalid scheme: should be one of `http|https|h2c`") + return scheme + + # pydantic wants 'cls' as first arg + @input_validator("port") + def validate_port(cls, port: int) -> int: # noqa: N805 + """Validate port.""" + assert isinstance(port, int), type(port) + assert 0 < port < 65535, "port out of TCP range" + return port + + +class IngressRequirerUnitData(DatabagModel): + """Ingress requirer unit databag model.""" + + host: str = Field(description="Hostname at which the unit is reachable.") + ip: Optional[str] = Field( + None, + description="IP at which the unit is reachable, " + "IP can only be None if the IP information can't be retrieved from juju.", + ) + + # pydantic wants 'cls' as first arg + @input_validator("host") + def validate_host(cls, host: str) -> str: # noqa: N805 + """Validate host.""" + assert isinstance(host, str), type(host) + return host + + # pydantic wants 'cls' as first arg + @input_validator("ip") + def validate_ip(cls, ip: str) -> Optional[str]: # noqa: N805 + """Validate ip.""" + if ip is None: + return None + if not isinstance(ip, str): + raise TypeError(f"got ip of type {type(ip)} instead of expected str") + try: + ipaddress.IPv4Address(ip) + return ip + except ipaddress.AddressValueError: + pass + try: + ipaddress.IPv6Address(ip) + return ip + except ipaddress.AddressValueError: + raise ValueError(f"{ip!r} is not a valid ip address") + + +class RequirerSchema(BaseModel): + """Requirer schema for Ingress.""" + + app: IngressRequirerAppData + unit: IngressRequirerUnitData + + +class IngressError(RuntimeError): + """Base class for custom errors raised by this library.""" + + +class NotReadyError(IngressError): + """Raised when a relation is not ready.""" + + +class DataValidationError(IngressError): + """Raised when data validation fails on IPU relation data.""" + + +class _IngressPerAppBase(Object): + """Base class for IngressPerUnit interface classes.""" + + def __init__(self, charm: CharmBase, relation_name: str = DEFAULT_RELATION_NAME): + super().__init__(charm, relation_name) + + self.charm: CharmBase = charm + self.relation_name = relation_name + self.app = self.charm.app + self.unit = self.charm.unit + + observe = self.framework.observe + rel_events = charm.on[relation_name] + observe(rel_events.relation_changed, self._handle_relation) + observe(rel_events.relation_departed, self._handle_relation) + observe(rel_events.relation_broken, self._handle_relation_broken) + observe(charm.on.leader_elected, self._handle_upgrade_or_leader) # type: ignore + observe(charm.on.upgrade_charm, self._handle_upgrade_or_leader) # type: ignore + + @property + def relations(self) -> List[Relation]: + """The list of Relation instances associated with this endpoint.""" + return list(self.charm.model.relations[self.relation_name]) + + def _handle_relation(self, event: RelationEvent) -> None: + """Subclasses should implement this method to handle a relation update.""" + pass + + def _handle_relation_broken(self, event: RelationEvent) -> None: + """Subclasses should implement this method to handle a relation breaking.""" + pass + + def _handle_upgrade_or_leader(self, event: EventBase) -> None: + """Subclasses should implement this method to handle upgrades or leadership change.""" + pass + + +class _IPAEvent(RelationEvent): + __args__: Tuple[str, ...] = () + __optional_kwargs__: Dict[str, Any] = {} + + @classmethod + def __attrs__(cls): # type: ignore + return cls.__args__ + tuple(cls.__optional_kwargs__.keys()) + + def __init__(self, handle, relation, *args, **kwargs): # type: ignore + super().__init__(handle, relation) + + if not len(self.__args__) == len(args): + raise TypeError("expected {} args, got {}".format(len(self.__args__), len(args))) + + for attr, obj in zip(self.__args__, args): + setattr(self, attr, obj) + for attr, default in self.__optional_kwargs__.items(): + obj = kwargs.get(attr, default) + setattr(self, attr, obj) + + def snapshot(self) -> Dict[str, Any]: + dct = super().snapshot() + for attr in self.__attrs__(): + obj = getattr(self, attr) + try: + dct[attr] = obj + except ValueError as e: + raise ValueError( + "cannot automagically serialize {}: " + "override this method and do it " + "manually.".format(obj) + ) from e + + return dct + + def restore(self, snapshot: Any) -> None: + super().restore(snapshot) + for attr, obj in snapshot.items(): + setattr(self, attr, obj) + + +class IngressPerAppDataProvidedEvent(_IPAEvent): + """Event representing that ingress data has been provided for an app.""" + + __args__ = ("name", "model", "hosts", "strip_prefix", "redirect_https") + + if typing.TYPE_CHECKING: + name: Optional[str] = None + model: Optional[str] = None + # sequence of hostname, port dicts + hosts: Sequence["IngressRequirerUnitData"] = () + strip_prefix: bool = False + redirect_https: bool = False + + +class IngressPerAppDataRemovedEvent(RelationEvent): + """Event representing that ingress data has been removed for an app.""" + + +class IngressPerAppEndpointsUpdatedEvent(RelationEvent): + """Event representing that the proxied endpoints have been updated.""" + + +class IngressPerAppProviderEvents(ObjectEvents): + """Container for IPA Provider events.""" + + data_provided = EventSource(IngressPerAppDataProvidedEvent) + data_removed = EventSource(IngressPerAppDataRemovedEvent) + endpoints_updated = EventSource(IngressPerAppEndpointsUpdatedEvent) + + +@dataclass +class IngressRequirerData: + """Data exposed by the ingress requirer to the provider.""" + + app: "IngressRequirerAppData" + units: List["IngressRequirerUnitData"] + + +class IngressPerAppProvider(_IngressPerAppBase): + """Implementation of the provider of ingress.""" + + on = IngressPerAppProviderEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + ): + """Constructor for IngressPerAppProvider. + + Args: + charm: The charm that is instantiating the instance. + relation_name: The name of the relation endpoint to bind to + (defaults to "ingress"). + """ + super().__init__(charm, relation_name) + + def _handle_relation(self, event: RelationEvent) -> None: + # created, joined or changed: if remote side has sent the required data: + # notify listeners. + if self.is_ready(event.relation): + data = self.get_data(event.relation) + self.on.data_provided.emit( # type: ignore + event.relation, + data.app.name, + data.app.model, + [ + unit.dict() if PYDANTIC_IS_V1 else unit.model_dump(mode="json") + for unit in data.units + ], + data.app.strip_prefix or False, + data.app.redirect_https or False, + ) + + def _handle_relation_broken(self, event: RelationEvent) -> None: + self.on.data_removed.emit(event.relation, event.relation.app) # type: ignore + + def wipe_ingress_data(self, relation: Relation) -> None: + """Clear ingress data from relation.""" + assert self.unit.is_leader(), "only leaders can do this" + try: + relation.data + except ModelError as e: + log.warning( + "error {} accessing relation data for {!r}. " + "Probably a ghost of a dead relation is still " + "lingering around.".format(e, relation.name) + ) + return + del relation.data[self.app]["ingress"] + self.on.endpoints_updated.emit(relation=relation, app=relation.app) + + def _get_requirer_units_data(self, relation: Relation) -> List["IngressRequirerUnitData"]: + """Fetch and validate the requirer's unit databag.""" + out: List["IngressRequirerUnitData"] = [] + + unit: Unit + for unit in relation.units: + databag = relation.data[unit] + try: + data = IngressRequirerUnitData.load(databag) + out.append(cast(IngressRequirerUnitData, data)) + except pydantic.ValidationError: + log.info(f"failed to validate remote unit data for {unit}") + raise + return out + + @staticmethod + def _get_requirer_app_data(relation: Relation) -> "IngressRequirerAppData": + """Fetch and validate the requirer's app databag.""" + app = relation.app + if app is None: + raise NotReadyError(relation) + + databag = relation.data[app] + return cast(IngressRequirerAppData, IngressRequirerAppData.load(databag)) + + def get_data(self, relation: Relation) -> IngressRequirerData: + """Fetch the remote (requirer) app and units' databags.""" + try: + return IngressRequirerData( + self._get_requirer_app_data(relation), self._get_requirer_units_data(relation) + ) + except (pydantic.ValidationError, DataValidationError) as e: + raise DataValidationError( + "failed to validate ingress requirer data: %s" % str(e) + ) from e + + def is_ready(self, relation: Optional[Relation] = None) -> bool: + """The Provider is ready if the requirer has sent valid data.""" + if not relation: + return any(map(self.is_ready, self.relations)) + + try: + self.get_data(relation) + except (DataValidationError, NotReadyError) as e: + log.info("Provider not ready; validation error encountered: %s" % str(e)) + return False + return True + + def _published_url(self, relation: Relation) -> Optional["IngressProviderAppData"]: + """Fetch and validate this app databag; return the ingress url.""" + if not self.is_ready(relation) or not self.unit.is_leader(): + # Handle edge case where remote app name can be missing, e.g., + # relation_broken events. + # Also, only leader units can read own app databags. + # FIXME https://github.com/canonical/traefik-k8s-operator/issues/34 + return None + + # fetch the provider's app databag + databag = relation.data[self.app] + if not databag.get("ingress"): + raise NotReadyError("This application did not `publish_url` yet.") + + return IngressProviderAppData.load(databag) + + def publish_url(self, relation: Relation, url: str) -> None: + """Publish to the app databag the ingress url.""" + ingress_url = {"url": url} + try: + IngressProviderAppData(ingress=ingress_url).dump(relation.data[self.app]) # type: ignore + self.on.endpoints_updated.emit(relation=relation, app=relation.app) + except pydantic.ValidationError as e: + # If we cannot validate the url as valid, publish an empty databag and log the error. + log.error(f"Failed to validate ingress url '{url}' - got ValidationError {e}") + log.error( + ( + f"url was not published to ingress relation for {relation.app}." + f"This error is likely due to an error or misconfiguration of the" + "charm calling this library." + ) + ) + IngressProviderAppData(ingress=None).dump(relation.data[self.app]) # type: ignore + + @property + def proxied_endpoints(self) -> Dict[str, Dict[str, str]]: + """Returns the ingress settings provided to applications by this IngressPerAppProvider. + + For example, when this IngressPerAppProvider has provided the + `http://foo.bar/my-model.my-app` URL to the my-app application, the returned dictionary + will be: + + ``` + { + "my-app": { + "url": "http://foo.bar/my-model.my-app" + } + } + ``` + """ + results: Dict[str, Dict[str, str]] = {} + + for ingress_relation in self.relations: + if not ingress_relation.app: + log.warning( + ( + f"no app in relation {ingress_relation} when fetching proxied endpoints:" + "skipping" + ) + ) + continue + try: + ingress_data = self._published_url(ingress_relation) + except NotReadyError: + log.warning( + f"no published url found in {ingress_relation}: " + f"traefik didn't publish_url yet to this relation." + ) + continue + + if not ingress_data: + log.warning(f"relation {ingress_relation} not ready yet: try again in some time.") + continue + + # Validation above means ingress cannot be None, but type checker doesn't know that. + ingress = cast(IngressProviderAppData, ingress_data.ingress) + if PYDANTIC_IS_V1: + results[ingress_relation.app.name] = ingress.dict() + else: + results[ingress_relation.app.name] = ingress.model_dump(mode="json") + return results + + +class IngressPerAppReadyEvent(_IPAEvent): + """Event representing that ingress for an app is ready.""" + + __args__ = ("url",) + if typing.TYPE_CHECKING: + url: Optional[str] = None + + +class IngressPerAppRevokedEvent(RelationEvent): + """Event representing that ingress for an app has been revoked.""" + + +class IngressPerAppRequirerEvents(ObjectEvents): + """Container for IPA Requirer events.""" + + ready = EventSource(IngressPerAppReadyEvent) + revoked = EventSource(IngressPerAppRevokedEvent) + + +class IngressPerAppRequirer(_IngressPerAppBase): + """Implementation of the requirer of the ingress relation.""" + + on = IngressPerAppRequirerEvents() # type: ignore + + # used to prevent spurious urls to be sent out if the event we're currently + # handling is a relation-broken one. + _stored = StoredState() + _auto_data: Optional[Tuple[Optional[str], Optional[str], int]] + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + *, + host: Optional[str] = None, + ip: Optional[str] = None, + port: Optional[int] = None, + strip_prefix: bool = False, + redirect_https: bool = False, + # fixme: this is horrible UX. + # shall we switch to manually calling provide_ingress_requirements with all args when + # ready? + scheme: Union[Callable[[], str], str] = lambda: "http", + healthcheck_params: Optional[Dict[str, Any]] = None, + ): + """Constructor for IngressRequirer. + + The request args can be used to specify the ingress properties when the + instance is created. If any are set, at least `port` is required, and + they will be sent to the ingress provider as soon as it is available. + All request args must be given as keyword args. + + Args: + charm: The charm that is instantiating the library. + relation_name: The name of the relation endpoint to bind to (defaults to "ingress"); + the relation must be of interface type "ingress" and have a limit of 1. + host: Hostname to be used by the ingress provider to address the requiring + application; if unspecified, the default Kubernetes service name will be used. + ip: Alternative addressing method other than host to be used by the ingress provider; + if unspecified, the binding address from the Juju network API will be used. + healthcheck_params: Optional dictionary containing health check + configuration parameters conforming to the IngressHealthCheck schema. + The dictionary must include: + - "path" (str): The health check endpoint path (required). + It may also include: + - "scheme" (Optional[str]): Replaces the server URL scheme for the health check + endpoint. + - "hostname" (Optional[str]): Hostname to be set in the health check request. + - "port" (Optional[int]): Replaces the server URL port for the health check + endpoint. + - "interval" (str): Frequency of the health check calls + (defaults to "30s" if omitted). + - "timeout" (str): Maximum duration for a health check request + (defaults to "5s" if omitted). + If provided, "path" is required while "interval" and "timeout" will use Traefik's + defaults when not specified. + strip_prefix: Configure Traefik to strip the path prefix. + redirect_https: Redirect incoming requests to HTTPS. + scheme: Either a callable that returns the scheme to use when constructing the ingress + URL, or a string if the scheme is known and stable at charm initialization. + + Request Args: + port: the port of the service + """ + super().__init__(charm, relation_name) + self.charm: CharmBase = charm + self.healthcheck_params = healthcheck_params + self.relation_name = relation_name + self._strip_prefix = strip_prefix + self._redirect_https = redirect_https + self._get_scheme = scheme if callable(scheme) else lambda: scheme + + self._stored.set_default(current_url=None) # type: ignore + + # if instantiated with a port, and we are related, then + # we immediately publish our ingress data to speed up the process. + if port: + self._auto_data = host, ip, port + else: + self._auto_data = None + + def _handle_relation(self, event: RelationEvent) -> None: + # created, joined or changed: if we have auto data: publish it + self._publish_auto_data() + if self.is_ready(): + # Avoid spurious events, emit only when there is a NEW URL available + new_url = ( + None + if isinstance(event, RelationBrokenEvent) + else self._get_url_from_relation_data() + ) + if self._stored.current_url != new_url: # type: ignore + self._stored.current_url = new_url # type: ignore + self.on.ready.emit(event.relation, new_url) # type: ignore + + def _handle_relation_broken(self, event: RelationEvent) -> None: + self._stored.current_url = None # type: ignore + self.on.revoked.emit(relation=event.relation, app=event.relation.app) # type: ignore + + def _handle_upgrade_or_leader(self, event: EventBase) -> None: + """On upgrade/leadership change: ensure we publish the data we have.""" + self._publish_auto_data() + + def is_ready(self) -> bool: + """The Requirer is ready if the Provider has sent valid data.""" + try: + return bool(self._get_url_from_relation_data()) + except DataValidationError as e: + log.debug("Requirer not ready; validation error encountered: %s" % str(e)) + return False + + def _publish_auto_data(self) -> None: + if self._auto_data: + host, ip, port = self._auto_data + self.provide_ingress_requirements(host=host, ip=ip, port=port) + + def provide_ingress_requirements( + self, + *, + scheme: Optional[str] = None, + host: Optional[str] = None, + ip: Optional[str] = None, + port: int, + ) -> None: + """Publishes the data that Traefik needs to provide ingress. + + Args: + scheme: Scheme to be used; if unspecified, use the one used by __init__. + host: Hostname to be used by the ingress provider to address the + requirer unit; if unspecified, FQDN will be used instead + ip: Alternative addressing method other than host to be used by the ingress provider. + if unspecified, binding address from juju network API will be used. + port: the port of the service (required) + """ + for relation in self.relations: + self._provide_ingress_requirements(scheme, host, ip, port, relation) + + def _provide_ingress_requirements( + self, + scheme: Optional[str], + host: Optional[str], + ip: Optional[str], + port: int, + relation: Relation, + ) -> None: + if self.unit.is_leader(): + self._publish_app_data(scheme, port, relation) + + self._publish_unit_data(host, ip, relation) + + def _publish_unit_data( + self, + host: Optional[str], + ip: Optional[str], + relation: Relation, + ) -> None: + if not host: + host = socket.getfqdn() + + if ip is None: + network_binding = self.charm.model.get_binding(relation) + if ( + network_binding is not None + and (bind_address := network_binding.network.bind_address) is not None + ): + ip = str(bind_address) + else: + log.error("failed to retrieve ip information from juju") + + unit_databag = relation.data[self.unit] + try: + IngressRequirerUnitData(host=host, ip=ip).dump(unit_databag) + except pydantic.ValidationError as e: + msg = "failed to validate unit data" + log.info(msg, exc_info=True) # log to INFO because this might be expected + raise DataValidationError(msg) from e + + def _publish_app_data( + self, + scheme: Optional[str], + port: int, + relation: Relation, + ) -> None: + # assumes leadership! + app_databag = relation.data[self.app] + + if not scheme: + # If scheme was not provided, use the one given to the constructor. + scheme = self._get_scheme() + + try: + # Ignore pyright errors since pyright does not like aliases. + IngressRequirerAppData( # type: ignore + model=self.model.name, + name=self.app.name, + scheme=scheme, + port=port, + strip_prefix=self._strip_prefix, # type: ignore + redirect_https=self._redirect_https, # type: ignore + healthcheck_params=( + IngressHealthCheck(**self.healthcheck_params) + if self.healthcheck_params + else None + ), + ).dump(app_databag) + except pydantic.ValidationError as e: + msg = "failed to validate app data" + log.info(msg, exc_info=True) # log to INFO because this might be expected + raise DataValidationError(msg) from e + + @property + def relation(self) -> Optional[Relation]: + """The established Relation instance, or None.""" + return self.relations[0] if self.relations else None + + def _get_url_from_relation_data(self) -> Optional[str]: + """The full ingress URL to reach the charm application. + + Returns None if the URL isn't available yet. + """ + relation = self.relation + if not relation or not relation.app: + return None + + # fetch the provider's app databag + try: + databag = relation.data[relation.app] + except ModelError as e: + log.debug( + f"Error {e} attempting to read remote app data; " + f"probably we are in a relation_departed hook" + ) + return None + + if not databag: # not ready yet + return None + + ingress = cast(IngressProviderAppData, IngressProviderAppData.load(databag)).ingress + if ingress is None: + return None + + return str(ingress.url) + + @property + def url(self) -> Optional[str]: + """The full ingress URL to reach the charm application. + + Returns None if the URL isn't available yet. + """ + data = ( + typing.cast(Optional[str], self._stored.current_url) # type: ignore + or self._get_url_from_relation_data() + ) + return data diff --git a/charms/garm-operator/src/charm.py b/charms/garm-operator/src/charm.py index 464a3fdb..64adf58a 100644 --- a/charms/garm-operator/src/charm.py +++ b/charms/garm-operator/src/charm.py @@ -5,8 +5,11 @@ """GARM charm entrypoint.""" import logging +import secrets import typing +import ops +import paas_charm.go import tomli_w logger = logging.getLogger(__name__) @@ -73,4 +76,97 @@ def render_garm_toml( return tomli_w.dumps(config) -# GarmCharm class and ops.main() entrypoint are added in the next implementation step. +def _generate_garm_secrets() -> dict[str, str]: + """Generate a fresh set of GARM secrets. + + Returns: + Dict with keys ``jwt-secret`` and ``db-passphrase``, each a 64-char hex string. + """ + return { + "jwt-secret": secrets.token_hex(32), + "db-passphrase": secrets.token_hex(32), + } + + +class GarmCharm(paas_charm.go.Charm): + """GARM charm — manages the GARM service via Pebble.""" + + def __init__(self, *args: typing.Any) -> None: + """Initialize the charm. + + Args: + args: Passed through to CharmBase. + """ + super().__init__(*args) + self.framework.observe(self.on.install, self._on_install) + + def _on_install(self, _: ops.InstallEvent) -> None: + """Ensure secrets exist on first install.""" + self._ensure_secrets() + + def restart(self, rerun_migrations: bool = False) -> None: + """Write GARM config then restart the workload. + + Overrides the parent to inject the TOML config file and correct + Pebble command before each restart. + + Args: + rerun_migrations: Passed through to the parent restart. + """ + if not self.is_ready(): + return + self._ensure_secrets() + super().restart(rerun_migrations=rerun_migrations) + container = self.unit.get_container(CONTAINER_NAME) + if not container.can_connect(): + return + self._push_garm_config(container) + container.add_layer( + "garm-command", + { + "services": { + PEBBLE_SERVICE_NAME: { + "override": "merge", + "command": f"{GARM_BINARY} -config {GARM_CONFIG_PATH}", + } + } + }, + combine=True, + ) + container.replan() + + def _ensure_secrets(self) -> None: + """Create the garm-secrets juju secret on first call (leader only).""" + if not self.unit.is_leader(): + return + try: + self.model.get_secret(label=GARM_SECRETS_LABEL) + except ops.SecretNotFoundError: + self.app.add_secret(_generate_garm_secrets(), label=GARM_SECRETS_LABEL) + + def _get_jwt_secret(self) -> str: + """Retrieve the JWT secret from the juju secret store. + + Returns: + The jwt-secret string. + """ + secret = self.model.get_secret(label=GARM_SECRETS_LABEL) + return secret.get_content()["jwt-secret"] + + def _push_garm_config(self, container: ops.Container) -> None: + """Render and push the GARM TOML config into the Pebble container. + + Args: + container: The Pebble container to push the config into. + """ + toml_content = render_garm_toml( + listen_address=str(self.config.get("garm-listen-address", "0.0.0.0")), + listen_port=int(self.config.get("garm-listen-port", 9997)), + db_path=str(self.config.get("garm-db-path", "/srv/garm/data/garm.db")), + jwt_secret=self._get_jwt_secret(), + ) + container.push(GARM_CONFIG_PATH, toml_content, make_dirs=True) + + +if __name__ == "__main__": + ops.main(GarmCharm) diff --git a/charms/garm-operator/tests/unit/test_charm.py b/charms/garm-operator/tests/unit/test_charm.py index c9579ddf..708c65b7 100644 --- a/charms/garm-operator/tests/unit/test_charm.py +++ b/charms/garm-operator/tests/unit/test_charm.py @@ -103,3 +103,37 @@ def test_render_garm_toml_provider_section(): provider["external"]["provider_executable"] == "/usr/local/bin/garm-provider-openstack" ) + + +# --------------------------------------------------------------------------- +# Secret management tests (no Harness needed — test the helper directly) +# --------------------------------------------------------------------------- + +def test_generate_garm_secrets_returns_hex_strings(): + """ + arrange: Nothing. + act: Call _generate_garm_secrets(). + assert: Returns a dict with jwt-secret and db-passphrase as 64-char hex strings. + """ + from charm import _generate_garm_secrets + + result = _generate_garm_secrets() + assert set(result.keys()) == {"jwt-secret", "db-passphrase"} + assert len(result["jwt-secret"]) == 64 + assert len(result["db-passphrase"]) == 64 + assert all(c in "0123456789abcdef" for c in result["jwt-secret"]) + assert all(c in "0123456789abcdef" for c in result["db-passphrase"]) + + +def test_generate_garm_secrets_produces_unique_values(): + """ + arrange: Nothing. + act: Call _generate_garm_secrets() twice. + assert: The two calls return different secrets. + """ + from charm import _generate_garm_secrets + + first = _generate_garm_secrets() + second = _generate_garm_secrets() + assert first["jwt-secret"] != second["jwt-secret"] + assert first["db-passphrase"] != second["db-passphrase"] From 54c4ed76c51bf18c5eed3bbf92828dac9aab441f Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Fri, 22 May 2026 14:04:15 +0800 Subject: [PATCH 08/29] fix: handle missing secret on non-leader units in restart() - Catch SecretNotFoundError in restart() and set WaitingStatus - Remove redundant can_connect() check (is_ready() already verifies) - Add TODO comment documenting the double-replan issue (ISD-5718) - Add clarifying comment for db-passphrase placeholder - Add pythonpath to pyproject.toml for IDE/direct pytest support Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/pyproject.toml | 1 + charms/garm-operator/src/charm.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/charms/garm-operator/pyproject.toml b/charms/garm-operator/pyproject.toml index 660ac999..9252457a 100644 --- a/charms/garm-operator/pyproject.toml +++ b/charms/garm-operator/pyproject.toml @@ -8,6 +8,7 @@ show_missing = true [tool.pytest.ini_options] minversion = "6.0" log_cli_level = "INFO" +pythonpath = ["src", "lib"] # Linting tools configuration [tool.ruff] diff --git a/charms/garm-operator/src/charm.py b/charms/garm-operator/src/charm.py index 64adf58a..883fa8b5 100644 --- a/charms/garm-operator/src/charm.py +++ b/charms/garm-operator/src/charm.py @@ -84,6 +84,7 @@ def _generate_garm_secrets() -> dict[str, str]: """ return { "jwt-secret": secrets.token_hex(32), + # Reserved for future SQLite encryption support (scaffold placeholder) "db-passphrase": secrets.token_hex(32), } @@ -116,11 +117,18 @@ def restart(self, rerun_migrations: bool = False) -> None: if not self.is_ready(): return self._ensure_secrets() + # TODO: Eliminate double-replan (ISD-5718). paas_charm calls replan() + # internally in super().restart(), which starts GARM with the default + # command momentarily before this method overrides it. Acceptable for + # the scaffold; resolve by contributing an upstream hook in a future story. super().restart(rerun_migrations=rerun_migrations) container = self.unit.get_container(CONTAINER_NAME) - if not container.can_connect(): + try: + self._push_garm_config(container) + except ops.SecretNotFoundError: + logger.warning("garm-secrets not yet available; deferring config push to next event") + self.unit.status = ops.WaitingStatus("Waiting for leader to initialise garm-secrets") return - self._push_garm_config(container) container.add_layer( "garm-command", { From a11a11499933876639d6489437845513146c276f Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Fri, 22 May 2026 14:05:17 +0800 Subject: [PATCH 09/29] feat: add garm-operator Terraform module Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/terraform/main.tf | 18 ++++++++ charms/garm-operator/terraform/outputs.tf | 7 +++ charms/garm-operator/terraform/variables.tf | 49 +++++++++++++++++++++ charms/garm-operator/terraform/versions.tf | 12 +++++ 4 files changed, 86 insertions(+) create mode 100644 charms/garm-operator/terraform/main.tf create mode 100644 charms/garm-operator/terraform/outputs.tf create mode 100644 charms/garm-operator/terraform/variables.tf create mode 100644 charms/garm-operator/terraform/versions.tf diff --git a/charms/garm-operator/terraform/main.tf b/charms/garm-operator/terraform/main.tf new file mode 100644 index 00000000..4c828dd4 --- /dev/null +++ b/charms/garm-operator/terraform/main.tf @@ -0,0 +1,18 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +resource "juju_application" "github_runner_garm" { + name = var.app_name + model_uuid = var.model_uuid + + charm { + name = "github-runner-garm" + channel = var.channel + revision = var.revision + base = var.base + } + + config = var.config + constraints = var.constraints + units = var.units +} diff --git a/charms/garm-operator/terraform/outputs.tf b/charms/garm-operator/terraform/outputs.tf new file mode 100644 index 00000000..f7258556 --- /dev/null +++ b/charms/garm-operator/terraform/outputs.tf @@ -0,0 +1,7 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +output "app_name" { + description = "Name of the deployed application." + value = juju_application.github_runner_garm.name +} diff --git a/charms/garm-operator/terraform/variables.tf b/charms/garm-operator/terraform/variables.tf new file mode 100644 index 00000000..70666cd3 --- /dev/null +++ b/charms/garm-operator/terraform/variables.tf @@ -0,0 +1,49 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +variable "app_name" { + description = "Name of the application in the Juju model." + type = string + default = "github-runner-garm" +} + +variable "base" { + description = "The operating system on which to deploy." + type = string + default = "ubuntu@24.04" +} + +variable "channel" { + description = "The channel to use when deploying the charm." + type = string + default = "latest/edge" +} + +variable "config" { + description = "Application config. See charmhub.io/github-runner-garm/configurations." + type = map(string) + default = {} +} + +variable "constraints" { + description = "Juju constraints to apply for this application." + type = string + default = "" +} + +variable "model_uuid" { + description = "UUID of the Juju model to deploy into." + type = string +} + +variable "revision" { + description = "Revision number of the charm." + type = number + default = null +} + +variable "units" { + description = "Number of units to deploy." + type = number + default = 1 +} diff --git a/charms/garm-operator/terraform/versions.tf b/charms/garm-operator/terraform/versions.tf new file mode 100644 index 00000000..2a99ba1b --- /dev/null +++ b/charms/garm-operator/terraform/versions.tf @@ -0,0 +1,12 @@ +# Copyright 2025 Canonical Ltd. +# See LICENSE file for licensing details. + +terraform { + required_version = ">= 1.6.6" + required_providers { + juju = { + source = "juju/juju" + version = ">= 1.0.0" + } + } +} From 872fc56a243c31ee41c7c6867d2dbf9fa80acae6 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 11:14:42 +0800 Subject: [PATCH 10/29] fix: update copyright year to 2026 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- build-garm-rock.sh | 2 +- charms/garm-operator/charmcraft.yaml | 2 +- .../charms/postgresql_k8s/v0/postgresql.py | 1035 +++++++++++++++++ charms/garm-operator/src/charm.py | 2 +- charms/garm-operator/terraform/main.tf | 2 +- charms/garm-operator/terraform/outputs.tf | 2 +- charms/garm-operator/terraform/variables.tf | 2 +- charms/garm-operator/terraform/versions.tf | 2 +- charms/garm-operator/tests/unit/test_charm.py | 2 +- garm-rockcraft.yaml | 2 +- 10 files changed, 1044 insertions(+), 9 deletions(-) create mode 100644 charms/garm-operator/lib/charms/postgresql_k8s/v0/postgresql.py diff --git a/build-garm-rock.sh b/build-garm-rock.sh index a435dc24..8217a70c 100755 --- a/build-garm-rock.sh +++ b/build-garm-rock.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # -# Copyright 2025 Canonical Ltd. +# Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. # diff --git a/charms/garm-operator/charmcraft.yaml b/charms/garm-operator/charmcraft.yaml index 7cbd1385..55095552 100644 --- a/charms/garm-operator/charmcraft.yaml +++ b/charms/garm-operator/charmcraft.yaml @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. name: github-runner-garm diff --git a/charms/garm-operator/lib/charms/postgresql_k8s/v0/postgresql.py b/charms/garm-operator/lib/charms/postgresql_k8s/v0/postgresql.py new file mode 100644 index 00000000..45b7bb64 --- /dev/null +++ b/charms/garm-operator/lib/charms/postgresql_k8s/v0/postgresql.py @@ -0,0 +1,1035 @@ +# Copyright 2022 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""PostgreSQL helper class. + +The `postgresql` module provides methods for interacting with the PostgreSQL instance. + +Any charm using this library should import the `psycopg2` or `psycopg2-binary` dependency. +""" + +import logging +from collections import OrderedDict +from typing import Dict, List, Optional, Set, Tuple + +import psycopg2 +from ops.model import Relation +from psycopg2.sql import SQL, Composed, Identifier, Literal + +# The unique Charmhub library identifier, never change it +LIBID = "24ee217a54e840a598ff21a079c3e678" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 58 + +# Groups to distinguish HBA access +ACCESS_GROUP_IDENTITY = "identity_access" +ACCESS_GROUP_INTERNAL = "internal_access" +ACCESS_GROUP_RELATION = "relation_access" + +# List of access groups to filter role assignments by +ACCESS_GROUPS = [ + ACCESS_GROUP_IDENTITY, + ACCESS_GROUP_INTERNAL, + ACCESS_GROUP_RELATION, +] + +# Groups to distinguish database permissions +PERMISSIONS_GROUP_ADMIN = "admin" + +INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE = "invalid role(s) for extra user roles" + +REQUIRED_PLUGINS = { + "address_standardizer": ["postgis"], + "address_standardizer_data_us": ["postgis"], + "jsonb_plperl": ["plperl"], + "postgis_raster": ["postgis"], + "postgis_tiger_geocoder": ["postgis", "fuzzystrmatch"], + "postgis_topology": ["postgis"], +} +DEPENDENCY_PLUGINS = set() +for dependencies in REQUIRED_PLUGINS.values(): + DEPENDENCY_PLUGINS |= set(dependencies) + +logger = logging.getLogger(__name__) + + +class PostgreSQLAssignGroupError(Exception): + """Exception raised when assigning to a group fails.""" + + +class PostgreSQLCreateDatabaseError(Exception): + """Exception raised when creating a database fails.""" + + +class PostgreSQLCreateGroupError(Exception): + """Exception raised when creating a group fails.""" + + +class PostgreSQLCreateUserError(Exception): + """Exception raised when creating a user fails.""" + + def __init__(self, message: Optional[str] = None): + super().__init__(message) + self.message = message + + +class PostgreSQLDatabasesSetupError(Exception): + """Exception raised when the databases setup fails.""" + + +class PostgreSQLDeleteUserError(Exception): + """Exception raised when deleting a user fails.""" + + +class PostgreSQLEnableDisableExtensionError(Exception): + """Exception raised when enabling/disabling an extension fails.""" + + +class PostgreSQLGetLastArchivedWALError(Exception): + """Exception raised when retrieving last archived WAL fails.""" + + +class PostgreSQLGetCurrentTimelineError(Exception): + """Exception raised when retrieving current timeline id for the PostgreSQL unit fails.""" + + +class PostgreSQLGetPostgreSQLVersionError(Exception): + """Exception raised when retrieving PostgreSQL version fails.""" + + +class PostgreSQLListAccessibleDatabasesForUserError(Exception): + """Exception raised when retrieving the accessible databases for a user fails.""" + + +class PostgreSQLListGroupsError(Exception): + """Exception raised when retrieving PostgreSQL groups list fails.""" + + +class PostgreSQLListUsersError(Exception): + """Exception raised when retrieving PostgreSQL users list fails.""" + + +class PostgreSQLUpdateUserPasswordError(Exception): + """Exception raised when updating a user password fails.""" + + +class PostgreSQL: + """Class to encapsulate all operations related to interacting with PostgreSQL instance.""" + + def __init__( + self, + primary_host: str, + current_host: str, + user: str, + password: str, + database: str, + system_users: Optional[List[str]] = None, + ): + self.primary_host = primary_host + self.current_host = current_host + self.user = user + self.password = password + self.database = database + self.system_users = system_users if system_users else [] + + def _configure_pgaudit(self, enable: bool) -> None: + connection = None + try: + connection = self._connect_to_database() + connection.autocommit = True + with connection.cursor() as cursor: + if enable: + cursor.execute("ALTER SYSTEM SET pgaudit.log = 'ROLE,DDL,MISC,MISC_SET';") + cursor.execute("ALTER SYSTEM SET pgaudit.log_client TO off;") + cursor.execute("ALTER SYSTEM SET pgaudit.log_parameter TO off;") + else: + cursor.execute("ALTER SYSTEM RESET pgaudit.log;") + cursor.execute("ALTER SYSTEM RESET pgaudit.log_client;") + cursor.execute("ALTER SYSTEM RESET pgaudit.log_parameter;") + cursor.execute("SELECT pg_reload_conf();") + finally: + if connection is not None: + connection.close() + + def _connect_to_database( + self, database: Optional[str] = None, database_host: Optional[str] = None + ) -> psycopg2.extensions.connection: + """Creates a connection to the database. + + Args: + database: database to connect to (defaults to the database + provided when the object for this class was created). + database_host: host to connect to instead of the primary host. + + Returns: + psycopg2 connection object. + """ + host = database_host if database_host is not None else self.primary_host + connection = psycopg2.connect( + f"dbname='{database if database else self.database}' user='{self.user}' host='{host}'" + f"password='{self.password}' connect_timeout=1" + ) + connection.autocommit = True + return connection + + def create_access_groups(self) -> None: + """Create access groups to distinguish HBA authentication methods.""" + connection = None + try: + with self._connect_to_database() as connection, connection.cursor() as cursor: + for group in ACCESS_GROUPS: + cursor.execute( + SQL("SELECT TRUE FROM pg_roles WHERE rolname={};").format(Literal(group)) + ) + if cursor.fetchone() is not None: + continue + cursor.execute( + SQL("CREATE ROLE {} NOLOGIN;").format( + Identifier(group), + ) + ) + except psycopg2.Error as e: + logger.error(f"Failed to create access groups: {e}") + raise PostgreSQLCreateGroupError() from e + finally: + if connection is not None: + connection.close() + + def create_database( + self, + database: str, + user: str, + plugins: Optional[List[str]] = None, + client_relations: Optional[List[Relation]] = None, + ) -> None: + """Creates a new database and grant privileges to a user on it. + + Args: + database: database to be created. + user: user that will have access to the database. + plugins: extensions to enable in the new database. + client_relations: current established client relations. + """ + plugins = plugins if plugins else [] + client_relations = client_relations if client_relations else [] + try: + connection = self._connect_to_database() + cursor = connection.cursor() + cursor.execute( + SQL("SELECT datname FROM pg_database WHERE datname={};").format(Literal(database)) + ) + if cursor.fetchone() is None: + cursor.execute(SQL("CREATE DATABASE {};").format(Identifier(database))) + cursor.execute( + SQL("REVOKE ALL PRIVILEGES ON DATABASE {} FROM PUBLIC;").format( + Identifier(database) + ) + ) + for user_to_grant_access in [user, PERMISSIONS_GROUP_ADMIN, *self.system_users]: + cursor.execute( + SQL("GRANT ALL PRIVILEGES ON DATABASE {} TO {};").format( + Identifier(database), Identifier(user_to_grant_access) + ) + ) + relations_accessing_this_database = 0 + for relation in client_relations: + for data in relation.data.values(): + if data.get("database") == database: + relations_accessing_this_database += 1 + with self._connect_to_database(database=database) as conn, conn.cursor() as curs: + curs.execute( + "SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT LIKE 'pg_%' and schema_name <> 'information_schema';" + ) + schemas = [row[0] for row in curs.fetchall()] + statements = self._generate_database_privileges_statements( + relations_accessing_this_database, schemas, user + ) + for statement in statements: + curs.execute(statement) + except psycopg2.Error as e: + logger.error(f"Failed to create database: {e}") + raise PostgreSQLCreateDatabaseError() from e + + # Enable preset extensions + if plugins: + self.enable_disable_extensions(dict.fromkeys(plugins, True), database) + + def create_user( + self, + user: str, + password: Optional[str] = None, + admin: bool = False, + extra_user_roles: Optional[List[str]] = None, + ) -> None: + """Creates a database user. + + Args: + user: user to be created. + password: password to be assigned to the user. + admin: whether the user should have additional admin privileges. + extra_user_roles: additional privileges and/or roles to be assigned to the user. + """ + try: + # Separate roles and privileges from the provided extra user roles. + admin_role = False + roles = privileges = None + if extra_user_roles: + admin_role = PERMISSIONS_GROUP_ADMIN in extra_user_roles + valid_privileges, valid_roles = self.list_valid_privileges_and_roles() + roles = [ + role + for role in extra_user_roles + if role in valid_roles and role != PERMISSIONS_GROUP_ADMIN + ] + privileges = { + extra_user_role + for extra_user_role in extra_user_roles + if extra_user_role not in roles and extra_user_role != PERMISSIONS_GROUP_ADMIN + } + invalid_privileges = [ + privilege for privilege in privileges if privilege not in valid_privileges + ] + if "relation_access" in invalid_privileges: + logger.warning("Extra user role relation_access not available. Skipping role.") + invalid_privileges.remove("relation_access") + privileges.remove("relation_access") + if len(invalid_privileges) > 0: + logger.error(f"Invalid extra user roles: {', '.join(privileges)}") + raise PostgreSQLCreateUserError(INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE) + + with self._connect_to_database() as connection, connection.cursor() as cursor: + # Create or update the user. + cursor.execute( + SQL("SELECT TRUE FROM pg_roles WHERE rolname={};").format(Literal(user)) + ) + if cursor.fetchone() is not None: + user_definition = "ALTER ROLE {}" + else: + user_definition = "CREATE ROLE {}" + user_definition += f"WITH {'NOLOGIN' if user == 'admin' else 'LOGIN'}{' SUPERUSER' if admin else ''} ENCRYPTED PASSWORD '{password}'{'IN ROLE admin CREATEDB' if admin_role else ''}" + if privileges: + user_definition += f" {' '.join(privileges)}" + cursor.execute(SQL("BEGIN;")) + cursor.execute(SQL("SET LOCAL log_statement = 'none';")) + cursor.execute(SQL(f"{user_definition};").format(Identifier(user))) + cursor.execute(SQL("COMMIT;")) + + # Add extra user roles to the new user. + if roles: + for role in roles: + cursor.execute( + SQL("GRANT {} TO {};").format(Identifier(role), Identifier(user)) + ) + except psycopg2.Error as e: + logger.error(f"Failed to create user: {e}") + raise PostgreSQLCreateUserError() from e + + def delete_user(self, user: str) -> None: + """Deletes a database user. + + Args: + user: user to be deleted. + """ + # First of all, check whether the user exists. Otherwise, do nothing. + users = self.list_users() + if user not in users: + return + + # List all databases. + try: + with self._connect_to_database() as connection, connection.cursor() as cursor: + cursor.execute("SELECT datname FROM pg_database WHERE datistemplate = false;") + databases = [row[0] for row in cursor.fetchall()] + + # Existing objects need to be reassigned in each database + # before the user can be deleted. + for database in databases: + with self._connect_to_database( + database + ) as connection, connection.cursor() as cursor: + cursor.execute( + SQL("REASSIGN OWNED BY {} TO {};").format( + Identifier(user), Identifier(self.user) + ) + ) + cursor.execute(SQL("DROP OWNED BY {};").format(Identifier(user))) + + # Delete the user. + with self._connect_to_database() as connection, connection.cursor() as cursor: + cursor.execute(SQL("DROP ROLE {};").format(Identifier(user))) + except psycopg2.Error as e: + logger.error(f"Failed to delete user: {e}") + raise PostgreSQLDeleteUserError() from e + + def grant_internal_access_group_memberships(self) -> None: + """Grant membership to the internal access-group to existing internal users.""" + connection = None + try: + with self._connect_to_database() as connection, connection.cursor() as cursor: + for user in self.system_users: + cursor.execute( + SQL("GRANT {} TO {};").format( + Identifier(ACCESS_GROUP_INTERNAL), + Identifier(user), + ) + ) + except psycopg2.Error as e: + logger.error(f"Failed to grant internal access group memberships: {e}") + raise PostgreSQLAssignGroupError() from e + finally: + if connection is not None: + connection.close() + + def grant_relation_access_group_memberships(self) -> None: + """Grant membership to the relation access-group to existing relation users.""" + rel_users = self.list_users_from_relation() + if not rel_users: + return + + connection = None + try: + with self._connect_to_database() as connection, connection.cursor() as cursor: + rel_groups = SQL(",").join(Identifier(group) for group in [ACCESS_GROUP_RELATION]) + rel_users = SQL(",").join(Identifier(user) for user in rel_users) + + cursor.execute( + SQL("GRANT {groups} TO {users};").format( + groups=rel_groups, + users=rel_users, + ) + ) + except psycopg2.Error as e: + logger.error(f"Failed to grant relation access group memberships: {e}") + raise PostgreSQLAssignGroupError() from e + finally: + if connection is not None: + connection.close() + + def enable_disable_extensions( + self, extensions: Dict[str, bool], database: Optional[str] = None + ) -> None: + """Enables or disables a PostgreSQL extension. + + Args: + extensions: the name of the extensions. + database: optional database where to enable/disable the extension. + + Raises: + PostgreSQLEnableDisableExtensionError if the operation fails. + """ + connection = None + try: + if database is not None: + databases = [database] + else: + # Retrieve all the databases. + with self._connect_to_database() as connection, connection.cursor() as cursor: + cursor.execute("SELECT datname FROM pg_database WHERE NOT datistemplate;") + databases = {database[0] for database in cursor.fetchall()} + + ordered_extensions = OrderedDict() + for plugin in DEPENDENCY_PLUGINS: + ordered_extensions[plugin] = extensions.get(plugin, False) + for extension, enable in extensions.items(): + ordered_extensions[extension] = enable + + self._configure_pgaudit(False) + + # Enable/disabled the extension in each database. + for database in databases: + with self._connect_to_database( + database=database + ) as connection, connection.cursor() as cursor: + for extension, enable in ordered_extensions.items(): + cursor.execute( + f"CREATE EXTENSION IF NOT EXISTS {extension};" + if enable + else f"DROP EXTENSION IF EXISTS {extension};" + ) + self._configure_pgaudit(ordered_extensions.get("pgaudit", False)) + except psycopg2.errors.UniqueViolation: + pass + except psycopg2.errors.DependentObjectsStillExist: + raise + except psycopg2.Error as e: + raise PostgreSQLEnableDisableExtensionError() from e + finally: + if connection is not None: + connection.close() + + def _generate_database_privileges_statements( + self, relations_accessing_this_database: int, schemas: List[str], user: str + ) -> List[Composed]: + """Generates a list of databases privileges statements.""" + statements = [] + if relations_accessing_this_database == 1: + statements.append( + SQL( + """DO $$ +DECLARE r RECORD; +BEGIN + FOR r IN (SELECT statement FROM (SELECT 1 AS index,'ALTER TABLE '|| schemaname || '."' || tablename ||'" OWNER TO {};' AS statement +FROM pg_tables WHERE NOT schemaname IN ('pg_catalog', 'information_schema') +UNION SELECT 2 AS index,'ALTER SEQUENCE '|| sequence_schema || '."' || sequence_name ||'" OWNER TO {};' AS statement +FROM information_schema.sequences WHERE NOT sequence_schema IN ('pg_catalog', 'information_schema') +UNION SELECT 3 AS index,'ALTER FUNCTION '|| nsp.nspname || '."' || p.proname ||'"('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {};' AS statement +FROM pg_proc p JOIN pg_namespace nsp ON p.pronamespace = nsp.oid WHERE NOT nsp.nspname IN ('pg_catalog', 'information_schema') AND p.prokind = 'f' +UNION SELECT 4 AS index,'ALTER PROCEDURE '|| nsp.nspname || '."' || p.proname ||'"('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {};' AS statement +FROM pg_proc p JOIN pg_namespace nsp ON p.pronamespace = nsp.oid WHERE NOT nsp.nspname IN ('pg_catalog', 'information_schema') AND p.prokind = 'p' +UNION SELECT 5 AS index,'ALTER AGGREGATE '|| nsp.nspname || '."' || p.proname ||'"('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {};' AS statement +FROM pg_proc p JOIN pg_namespace nsp ON p.pronamespace = nsp.oid WHERE NOT nsp.nspname IN ('pg_catalog', 'information_schema') AND p.prokind = 'a' +UNION SELECT 6 AS index,'ALTER VIEW '|| schemaname || '."' || viewname ||'" OWNER TO {};' AS statement +FROM pg_catalog.pg_views WHERE NOT schemaname IN ('pg_catalog', 'information_schema')) AS statements ORDER BY index) LOOP + EXECUTE format(r.statement); + END LOOP; +END; $$;""" + ).format( + Identifier(user), + Identifier(user), + Identifier(user), + Identifier(user), + Identifier(user), + Identifier(user), + ) + ) + statements.append( + SQL( + "UPDATE pg_catalog.pg_largeobject_metadata\n" + "SET lomowner = (SELECT oid FROM pg_roles WHERE rolname = {})\n" + "WHERE lomowner = (SELECT oid FROM pg_roles WHERE rolname = {});" + ).format(Literal(user), Literal(self.user)) + ) + for schema in schemas: + statements.append( + SQL("ALTER SCHEMA {} OWNER TO {};").format( + Identifier(schema), Identifier(user) + ) + ) + else: + for schema in schemas: + schema = Identifier(schema) + statements.extend([ + SQL("GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA {} TO {};").format( + schema, Identifier(user) + ), + SQL("GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA {} TO {};").format( + schema, Identifier(user) + ), + SQL("GRANT ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA {} TO {};").format( + schema, Identifier(user) + ), + SQL("GRANT USAGE ON SCHEMA {} TO {};").format(schema, Identifier(user)), + SQL("GRANT CREATE ON SCHEMA {} TO {};").format(schema, Identifier(user)), + ]) + return statements + + def get_last_archived_wal(self) -> str: + """Get the name of the last archived wal for the current PostgreSQL cluster.""" + try: + with self._connect_to_database() as connection, connection.cursor() as cursor: + cursor.execute("SELECT last_archived_wal FROM pg_stat_archiver;") + return cursor.fetchone()[0] + except psycopg2.Error as e: + logger.error(f"Failed to get PostgreSQL last archived WAL: {e}") + raise PostgreSQLGetLastArchivedWALError() from e + + def get_current_timeline(self) -> str: + """Get the timeline id for the current PostgreSQL unit.""" + try: + with self._connect_to_database() as connection, connection.cursor() as cursor: + cursor.execute("SELECT timeline_id FROM pg_control_checkpoint();") + return cursor.fetchone()[0] + except psycopg2.Error as e: + logger.error(f"Failed to get PostgreSQL current timeline id: {e}") + raise PostgreSQLGetCurrentTimelineError() from e + + def get_postgresql_text_search_configs(self) -> Set[str]: + """Returns the PostgreSQL available text search configs. + + Returns: + Set of PostgreSQL text search configs. + """ + with self._connect_to_database( + database_host=self.current_host + ) as connection, connection.cursor() as cursor: + cursor.execute("SELECT CONCAT('pg_catalog.', cfgname) FROM pg_ts_config;") + text_search_configs = cursor.fetchall() + return {text_search_config[0] for text_search_config in text_search_configs} + + def get_postgresql_timezones(self) -> Set[str]: + """Returns the PostgreSQL available timezones. + + Returns: + Set of PostgreSQL timezones. + """ + with self._connect_to_database( + database_host=self.current_host + ) as connection, connection.cursor() as cursor: + cursor.execute("SELECT name FROM pg_timezone_names;") + timezones = cursor.fetchall() + return {timezone[0] for timezone in timezones} + + def get_postgresql_default_table_access_methods(self) -> Set[str]: + """Returns the PostgreSQL available table access methods. + + Returns: + Set of PostgreSQL table access methods. + """ + with self._connect_to_database( + database_host=self.current_host + ) as connection, connection.cursor() as cursor: + cursor.execute("SELECT amname FROM pg_am WHERE amtype = 't';") + access_methods = cursor.fetchall() + return {access_method[0] for access_method in access_methods} + + def get_postgresql_version(self, current_host=True) -> str: + """Returns the PostgreSQL version. + + Returns: + PostgreSQL version number. + """ + host = self.current_host if current_host else None + try: + with self._connect_to_database( + database_host=host + ) as connection, connection.cursor() as cursor: + cursor.execute("SELECT version();") + # Split to get only the version number. + return cursor.fetchone()[0].split(" ")[1] + except psycopg2.Error as e: + logger.error(f"Failed to get PostgreSQL version: {e}") + raise PostgreSQLGetPostgreSQLVersionError() from e + + def is_tls_enabled(self, check_current_host: bool = False) -> bool: + """Returns whether TLS is enabled. + + Args: + check_current_host: whether to check the current host + instead of the primary host. + + Returns: + whether TLS is enabled. + """ + try: + with self._connect_to_database( + database_host=self.current_host if check_current_host else None + ) as connection, connection.cursor() as cursor: + cursor.execute("SHOW ssl;") + return "on" in cursor.fetchone()[0] + except psycopg2.Error: + # Connection errors happen when PostgreSQL has not started yet. + return False + + def list_access_groups(self, current_host=False) -> Set[str]: + """Returns the list of PostgreSQL database access groups. + + Args: + current_host: whether to check the current host + instead of the primary host. + + Returns: + List of PostgreSQL database access groups. + """ + connection = None + host = self.current_host if current_host else None + try: + with self._connect_to_database( + database_host=host + ) as connection, connection.cursor() as cursor: + cursor.execute( + "SELECT groname FROM pg_catalog.pg_group WHERE groname LIKE '%_access';" + ) + access_groups = cursor.fetchall() + return {group[0] for group in access_groups} + except psycopg2.Error as e: + logger.error(f"Failed to list PostgreSQL database access groups: {e}") + raise PostgreSQLListGroupsError() from e + finally: + if connection is not None: + connection.close() + + def list_accessible_databases_for_user(self, user: str, current_host=False) -> Set[str]: + """Returns the list of accessible databases for a specific user. + + Args: + user: the user to check. + current_host: whether to check the current host + instead of the primary host. + + Returns: + List of accessible database (the ones where + the user has the CONNECT privilege). + """ + connection = None + host = self.current_host if current_host else None + try: + with self._connect_to_database( + database_host=host + ) as connection, connection.cursor() as cursor: + cursor.execute( + SQL( + "SELECT TRUE FROM pg_catalog.pg_user WHERE usename = {} AND usesuper;" + ).format(Literal(user)) + ) + if cursor.fetchone() is not None: + return {"all"} + cursor.execute( + SQL( + "SELECT datname FROM pg_catalog.pg_database WHERE has_database_privilege({}, datname, 'CONNECT') AND NOT datistemplate;" + ).format(Literal(user)) + ) + databases = cursor.fetchall() + return {database[0] for database in databases} + except psycopg2.Error as e: + logger.error(f"Failed to list accessible databases for user {user}: {e}") + raise PostgreSQLListAccessibleDatabasesForUserError() from e + finally: + if connection is not None: + connection.close() + + def list_users(self, group: Optional[str] = None, current_host=False) -> Set[str]: + """Returns the list of PostgreSQL database users. + + Args: + group: optional group to filter the users. + current_host: whether to check the current host + instead of the primary host. + + Returns: + List of PostgreSQL database users. + """ + connection = None + host = self.current_host if current_host else None + try: + with self._connect_to_database( + database_host=host + ) as connection, connection.cursor() as cursor: + if group: + query = SQL( + "SELECT usename FROM (SELECT UNNEST(grolist) AS user_id FROM pg_catalog.pg_group WHERE groname = {}) AS g JOIN pg_catalog.pg_user AS u ON g.user_id = u.usesysid;" + ).format(Literal(group)) + else: + query = "SELECT usename FROM pg_catalog.pg_user;" + cursor.execute(query) + usernames = cursor.fetchall() + return {username[0] for username in usernames} + except psycopg2.Error as e: + logger.error(f"Failed to list PostgreSQL database users: {e}") + raise PostgreSQLListUsersError() from e + finally: + if connection is not None: + connection.close() + + def list_users_from_relation(self, current_host=False) -> Set[str]: + """Returns the list of PostgreSQL database users that were created by a relation. + + Args: + current_host: whether to check the current host + instead of the primary host. + + Returns: + List of PostgreSQL database users. + """ + connection = None + host = self.current_host if current_host else None + try: + with self._connect_to_database( + database_host=host + ) as connection, connection.cursor() as cursor: + cursor.execute( + "SELECT usename " + "FROM pg_catalog.pg_user " + "WHERE usename LIKE 'relation_id_%' OR usename LIKE 'relation-%' " + "OR usename LIKE 'pgbouncer_auth_relation_%' OR usename LIKE '%_user_%_%';" + ) + usernames = cursor.fetchall() + return {username[0] for username in usernames} + except psycopg2.Error as e: + logger.error(f"Failed to list PostgreSQL database users: {e}") + raise PostgreSQLListUsersError() from e + finally: + if connection is not None: + connection.close() + + def list_valid_privileges_and_roles(self) -> Tuple[Set[str], Set[str]]: + """Returns two sets with valid privileges and roles. + + Returns: + Tuple containing two sets: the first with valid privileges + and the second with valid roles. + """ + with self._connect_to_database() as connection, connection.cursor() as cursor: + cursor.execute("SELECT rolname FROM pg_roles;") + return { + "createdb", + "createrole", + "superuser", + }, {role[0] for role in cursor.fetchall() if role[0]} + + def set_up_database(self) -> None: + """Set up postgres database with the right permissions.""" + connection = None + cursor = None + try: + with self._connect_to_database() as connection, connection.cursor() as cursor: + cursor.execute("SELECT TRUE FROM pg_roles WHERE rolname='admin';") + if cursor.fetchone() is None: + # Allow access to the postgres database only to the system users. + cursor.execute("REVOKE ALL PRIVILEGES ON DATABASE postgres FROM PUBLIC;") + cursor.execute("REVOKE CREATE ON SCHEMA public FROM PUBLIC;") + for user in self.system_users: + cursor.execute( + SQL("GRANT ALL PRIVILEGES ON DATABASE postgres TO {};").format( + Identifier(user) + ) + ) + self.create_user( + PERMISSIONS_GROUP_ADMIN, + extra_user_roles=["pg_read_all_data", "pg_write_all_data"], + ) + cursor.execute("GRANT CONNECT ON DATABASE postgres TO admin;") + except psycopg2.Error as e: + logger.error(f"Failed to set up databases: {e}") + raise PostgreSQLDatabasesSetupError() from e + finally: + if cursor is not None: + cursor.close() + if connection is not None: + connection.close() + + def update_user_password( + self, username: str, password: str, database_host: Optional[str] = None + ) -> None: + """Update a user password. + + Args: + username: the user to update the password. + password: the new password for the user. + database_host: the host to connect to. + + Raises: + PostgreSQLUpdateUserPasswordError if the password couldn't be changed. + """ + connection = None + try: + with self._connect_to_database( + database_host=database_host + ) as connection, connection.cursor() as cursor: + cursor.execute(SQL("BEGIN;")) + cursor.execute(SQL("SET LOCAL log_statement = 'none';")) + cursor.execute( + SQL("ALTER USER {} WITH ENCRYPTED PASSWORD '" + password + "';").format( + Identifier(username) + ) + ) + cursor.execute(SQL("COMMIT;")) + except psycopg2.Error as e: + logger.error(f"Failed to update user password: {e}") + raise PostgreSQLUpdateUserPasswordError() from e + finally: + if connection is not None: + connection.close() + + def is_restart_pending(self) -> bool: + """Query pg_settings for pending restart.""" + connection = None + try: + with self._connect_to_database( + database_host=self.current_host + ) as connection, connection.cursor() as cursor: + cursor.execute("SELECT COUNT(*) FROM pg_settings WHERE pending_restart=True;") + return cursor.fetchone()[0] > 0 + except psycopg2.OperationalError: + logger.warning("Failed to connect to PostgreSQL.") + return False + except psycopg2.Error as e: + logger.error(f"Failed to check if restart is pending: {e}") + return False + finally: + if connection: + connection.close() + + @staticmethod + def build_postgresql_group_map(group_map: Optional[str]) -> List[Tuple]: + """Build the PostgreSQL authorization group-map. + + Args: + group_map: serialized group-map with the following format: + =, + =, + ... + + Returns: + List of LDAP group to PostgreSQL group tuples. + """ + if group_map is None: + return [] + + group_mappings = group_map.split(",") + group_mappings = (mapping.strip() for mapping in group_mappings) + group_map_list = [] + + for mapping in group_mappings: + mapping_parts = mapping.split("=") + if len(mapping_parts) != 2: + raise ValueError("The group-map must contain value pairs split by commas") + + ldap_group = mapping_parts[0] + psql_group = mapping_parts[1] + + if psql_group in [*ACCESS_GROUPS, PERMISSIONS_GROUP_ADMIN]: + logger.warning(f"Tried to assign LDAP users to forbidden group: {psql_group}") + continue + + group_map_list.append((ldap_group, psql_group)) + + return group_map_list + + @staticmethod + def build_postgresql_parameters( + config_options: dict, available_memory: int, limit_memory: Optional[int] = None + ) -> Optional[dict]: + """Builds the PostgreSQL parameters. + + Args: + config_options: charm config options containing profile and PostgreSQL parameters. + available_memory: available memory to use in calculation in bytes. + limit_memory: (optional) limit memory to use in calculation in bytes. + + Returns: + Dictionary with the PostgreSQL parameters. + """ + if limit_memory: + available_memory = min(available_memory, limit_memory) + profile = config_options["profile"] + logger.debug(f"Building PostgreSQL parameters for {profile=} and {available_memory=}") + parameters = {} + for config, value in config_options.items(): + # Filter config option not related to PostgreSQL parameters. + if not config.startswith(( + "connection", + "cpu", + "durability", + "instance", + "logging", + "memory", + "optimizer", + "request", + "response", + "session", + "storage", + "vacuum", + )): + continue + parameter = "_".join(config.split("_")[1:]) + if parameter in ["date_style", "time_zone"]: + parameter = "".join(x.capitalize() for x in parameter.split("_")) + elif parameter.startswith("pg_stat_statements"): + parameter = "pg_stat_statements." + parameter.removeprefix("pg_stat_statements_") + elif parameter == "maximum_lag_on_failover": + continue + parameters[parameter] = value + shared_buffers_max_value_in_mb = int(available_memory * 0.4 / 10**6) + shared_buffers_max_value = int(shared_buffers_max_value_in_mb * 10**3 / 8) + if parameters.get("shared_buffers", 0) > shared_buffers_max_value: + raise Exception( + f"Shared buffers config option should be at most 40% of the available memory, which is {shared_buffers_max_value_in_mb}MB" + ) + if profile == "production": + if "shared_buffers" in parameters: + # Convert to bytes to use in the calculation. + shared_buffers = parameters["shared_buffers"] * 8 * 10**3 + else: + # Use 25% of the available memory for shared_buffers. + # and the remaining as cache memory. + shared_buffers = int(available_memory * 0.25) + parameters["shared_buffers"] = f"{int(shared_buffers * 128 / 10**6)}" + effective_cache_size = int(available_memory - shared_buffers) + parameters.update({ + "effective_cache_size": f"{int(effective_cache_size / 10**6) * 128}" + }) + return parameters + + def validate_date_style(self, date_style: str) -> bool: + """Validate a date style against PostgreSQL. + + Returns: + Whether the date style is valid. + """ + try: + with self._connect_to_database( + database_host=self.current_host + ) as connection, connection.cursor() as cursor: + cursor.execute( + SQL( + "SET DateStyle to {};", + ).format(Identifier(date_style)) + ) + return True + except psycopg2.Error: + return False + + def validate_group_map(self, group_map: Optional[str]) -> bool: + """Validate the PostgreSQL authorization group-map. + + Args: + group_map: serialized group-map with the following format: + =, + =, + ... + + Returns: + Whether the group-map is valid. + """ + if group_map is None: + return True + + try: + group_map = self.build_postgresql_group_map(group_map) + except ValueError: + return False + + for _, psql_group in group_map: + with self._connect_to_database() as connection, connection.cursor() as cursor: + query = SQL("SELECT TRUE FROM pg_roles WHERE rolname={};") + query = query.format(Literal(psql_group)) + cursor.execute(query) + + if cursor.fetchone() is None: + return False + + return True + + def is_user_in_hba(self, username: str) -> bool: + """Check if user was added in pg_hba.""" + connection = None + try: + with self._connect_to_database() as connection, connection.cursor() as cursor: + cursor.execute( + SQL( + "SELECT COUNT(*) FROM pg_hba_file_rules WHERE {} = ANY(user_name);" + ).format(Literal(username)) + ) + return cursor.fetchone()[0] > 0 + except psycopg2.Error as e: + logger.debug(f"Failed to check pg_hba: {e}") + return False + finally: + if connection: + connection.close() diff --git a/charms/garm-operator/src/charm.py b/charms/garm-operator/src/charm.py index 883fa8b5..0b1fabbe 100644 --- a/charms/garm-operator/src/charm.py +++ b/charms/garm-operator/src/charm.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2025 Canonical Ltd. +# Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. """GARM charm entrypoint.""" diff --git a/charms/garm-operator/terraform/main.tf b/charms/garm-operator/terraform/main.tf index 4c828dd4..76417fb0 100644 --- a/charms/garm-operator/terraform/main.tf +++ b/charms/garm-operator/terraform/main.tf @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. resource "juju_application" "github_runner_garm" { diff --git a/charms/garm-operator/terraform/outputs.tf b/charms/garm-operator/terraform/outputs.tf index f7258556..c70eb441 100644 --- a/charms/garm-operator/terraform/outputs.tf +++ b/charms/garm-operator/terraform/outputs.tf @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. output "app_name" { diff --git a/charms/garm-operator/terraform/variables.tf b/charms/garm-operator/terraform/variables.tf index 70666cd3..cc29e0c9 100644 --- a/charms/garm-operator/terraform/variables.tf +++ b/charms/garm-operator/terraform/variables.tf @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. variable "app_name" { diff --git a/charms/garm-operator/terraform/versions.tf b/charms/garm-operator/terraform/versions.tf index 2a99ba1b..efb63996 100644 --- a/charms/garm-operator/terraform/versions.tf +++ b/charms/garm-operator/terraform/versions.tf @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. terraform { diff --git a/charms/garm-operator/tests/unit/test_charm.py b/charms/garm-operator/tests/unit/test_charm.py index 708c65b7..6bcbbcf2 100644 --- a/charms/garm-operator/tests/unit/test_charm.py +++ b/charms/garm-operator/tests/unit/test_charm.py @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. """Unit tests for GarmCharm.""" diff --git a/garm-rockcraft.yaml b/garm-rockcraft.yaml index 88fd2353..545ca07a 100644 --- a/garm-rockcraft.yaml +++ b/garm-rockcraft.yaml @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. name: garm From a490782fbb80bff4ef71326e3409a14118c04515 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 11:21:48 +0800 Subject: [PATCH 11/29] fix: align GARM config and DB paths with upstream defaults Use /etc/garm/config.toml and /etc/garm/garm.db to match the hardcoded DefaultConfigFilePath constant, Dockerfile ENTRYPOINT, and systemd unit in the upstream cloudbase/garm repository. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/charmcraft.yaml | 2 +- charms/garm-operator/src/charm.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/charms/garm-operator/charmcraft.yaml b/charms/garm-operator/charmcraft.yaml index 55095552..36595241 100644 --- a/charms/garm-operator/charmcraft.yaml +++ b/charms/garm-operator/charmcraft.yaml @@ -31,5 +31,5 @@ config: description: Port GARM API server listens on. garm-db-path: type: string - default: "/srv/garm/data/garm.db" + default: "/etc/garm/garm.db" description: Path to the SQLite database file. diff --git a/charms/garm-operator/src/charm.py b/charms/garm-operator/src/charm.py index 0b1fabbe..516e6138 100644 --- a/charms/garm-operator/src/charm.py +++ b/charms/garm-operator/src/charm.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -GARM_CONFIG_PATH: typing.Final[str] = "/srv/garm/config/config.toml" +GARM_CONFIG_PATH: typing.Final[str] = "/etc/garm/config.toml" GARM_SECRETS_LABEL: typing.Final[str] = "garm-secrets" CONTAINER_NAME: typing.Final[str] = "app" PEBBLE_SERVICE_NAME: typing.Final[str] = "app" @@ -170,7 +170,7 @@ def _push_garm_config(self, container: ops.Container) -> None: toml_content = render_garm_toml( listen_address=str(self.config.get("garm-listen-address", "0.0.0.0")), listen_port=int(self.config.get("garm-listen-port", 9997)), - db_path=str(self.config.get("garm-db-path", "/srv/garm/data/garm.db")), + db_path=str(self.config.get("garm-db-path", "/etc/garm/garm.db")), jwt_secret=self._get_jwt_secret(), ) container.push(GARM_CONFIG_PATH, toml_content, make_dirs=True) From 9f1592d786156d098d9832fe0a989107d0cb5a10 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 11:51:43 +0800 Subject: [PATCH 12/29] fix: move --only-binary=pluggy from requirements.txt to tox unit deps pluggy is a pytest plugin system dependency with no runtime role. The constraint belongs in the test environment, not in the charm's production requirements. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/requirements.txt | 1 - charms/garm-operator/tox.toml | 2 +- charms/planner-operator/requirements.txt | 1 - charms/planner-operator/tox.toml | 2 +- charms/webhook-gateway-operator/requirements.txt | 1 - charms/webhook-gateway-operator/tox.toml | 2 +- 6 files changed, 3 insertions(+), 6 deletions(-) diff --git a/charms/garm-operator/requirements.txt b/charms/garm-operator/requirements.txt index efbc780f..a50a8449 100644 --- a/charms/garm-operator/requirements.txt +++ b/charms/garm-operator/requirements.txt @@ -1,4 +1,3 @@ ---only-binary=pluggy ops==3.7.0 paas-charm==1.11.2 tomli-w==1.2.0 diff --git a/charms/garm-operator/tox.toml b/charms/garm-operator/tox.toml index 8bd49c05..3349ae98 100644 --- a/charms/garm-operator/tox.toml +++ b/charms/garm-operator/tox.toml @@ -26,7 +26,7 @@ commands = [["pyright"]] [env.unit] description = "Run unit tests" -deps = ["pytest", "coverage[toml]", "tomli; python_version < '3.11'", "-r requirements.txt"] +deps = ["--only-binary=pluggy", "pytest", "coverage[toml]", "tomli; python_version < '3.11'", "-r requirements.txt"] commands = [ [ "coverage", diff --git a/charms/planner-operator/requirements.txt b/charms/planner-operator/requirements.txt index 0eba1f53..d97cebee 100644 --- a/charms/planner-operator/requirements.txt +++ b/charms/planner-operator/requirements.txt @@ -1,4 +1,3 @@ ---only-binary=pluggy ops==3.7.0 paas-charm==1.11.2 requests==2.34.2 diff --git a/charms/planner-operator/tox.toml b/charms/planner-operator/tox.toml index 598d1fdc..691082a5 100644 --- a/charms/planner-operator/tox.toml +++ b/charms/planner-operator/tox.toml @@ -26,7 +26,7 @@ commands = [["pyright"]] [env.unit] description = "Run unit tests" -deps = ["pytest", "requests-mock", "coverage[toml]", "-r requirements.txt"] +deps = ["--only-binary=pluggy", "pytest", "requests-mock", "coverage[toml]", "-r requirements.txt"] commands = [ [ "coverage", diff --git a/charms/webhook-gateway-operator/requirements.txt b/charms/webhook-gateway-operator/requirements.txt index 48a0a0ff..02251a66 100644 --- a/charms/webhook-gateway-operator/requirements.txt +++ b/charms/webhook-gateway-operator/requirements.txt @@ -1,3 +1,2 @@ ---only-binary=pluggy ops==3.7.0 paas-charm==1.11.2 diff --git a/charms/webhook-gateway-operator/tox.toml b/charms/webhook-gateway-operator/tox.toml index cf46531a..6f12eb17 100644 --- a/charms/webhook-gateway-operator/tox.toml +++ b/charms/webhook-gateway-operator/tox.toml @@ -26,7 +26,7 @@ commands = [["pyright"]] [env.unit] description = "Run unit tests" -deps = ["pytest", "pytest-custom-exit-code", "coverage[toml]", "-r requirements.txt"] +deps = ["--only-binary=pluggy", "pytest", "pytest-custom-exit-code", "coverage[toml]", "-r requirements.txt"] commands = [ [ "coverage", From e898977ca1c7d8ba1eff0e64b3344016eaed5bb1 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 12:01:25 +0800 Subject: [PATCH 13/29] feat: add GARM charm integration tests Tests cover: - ROCK image contains GARM and OpenStack provider binaries - Charm deploys and reaches active status - Pebble service runs garm with -config /etc/garm/config.toml - Leader generates garm-secrets Juju secret with jwt-secret and db-passphrase Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/tests/conftest.py | 6 ++ charms/tests/integration/conftest.py | 49 ++++++++++++ charms/tests/integration/test_garm.py | 105 ++++++++++++++++++++++++++ 3 files changed, 160 insertions(+) create mode 100644 charms/tests/integration/test_garm.py diff --git a/charms/tests/conftest.py b/charms/tests/conftest.py index bbc3ab5e..f74b9d89 100644 --- a/charms/tests/conftest.py +++ b/charms/tests/conftest.py @@ -8,6 +8,7 @@ CHARM_FILE_PARAM = "--charm-file" PLANNER_IMAGE_PARAM = "--planner-image" WEBHOOK_GATEWAY_IMAGE_PARAM = "--webhook-gateway-image" +GARM_IMAGE_PARAM = "--garm-image" def pytest_addoption(parser: Parser) -> None: @@ -27,3 +28,8 @@ def pytest_addoption(parser: Parser) -> None: action="store", help="Webhook gateway app image to be deployed", ) + parser.addoption( + GARM_IMAGE_PARAM, + action="store", + help="GARM app image to be deployed", + ) diff --git a/charms/tests/integration/conftest.py b/charms/tests/integration/conftest.py index 2ee5fb38..aa959d16 100644 --- a/charms/tests/integration/conftest.py +++ b/charms/tests/integration/conftest.py @@ -12,6 +12,7 @@ import requests from tests.conftest import ( CHARM_FILE_PARAM, + GARM_IMAGE_PARAM, PLANNER_IMAGE_PARAM, WEBHOOK_GATEWAY_IMAGE_PARAM, ) @@ -306,3 +307,51 @@ def _on_planner_relation_joined(self, event): delay=10, ) return app_name + + +@pytest.fixture(name="garm_charm_file", scope="module") +def garm_charm_file_fixture(pytestconfig: pytest.Config) -> str | None: + """Return the path to the built GARM charm file.""" + charm = pytestconfig.getoption(CHARM_FILE_PARAM) + if not charm: + return None + if len(charm) > 1: + garm_charm = [file for file in charm if "garm" in file] + return garm_charm[0] + return charm[0] + + +@pytest.fixture(name="garm_app_image", scope="module") +def garm_app_image_fixture(pytestconfig: pytest.Config) -> str | None: + """Return the GARM OCI image reference for the app-image resource.""" + return pytestconfig.getoption(GARM_IMAGE_PARAM) + + +@pytest.fixture(scope="module", name="garm_app") +def deploy_garm_app_fixture( + juju: jubilant.Juju, + garm_charm_file: str, + garm_app_image: str, +) -> str: + """Deploy the GARM application and wait for it to become active. + + - Deploys the GARM charm with the provided ROCK image as the app-image resource. + - Waits for the application to reach active status. GARM requires no external + integrations at the scaffold stage: secrets are auto-generated by the leader + unit and all config options have sensible defaults. + + Returns the application name once active. + """ + app_name = "github-runner-garm" + + juju.deploy( + charm=garm_charm_file, + app=app_name, + resources={"app-image": garm_app_image}, + ) + juju.wait( + lambda status: jubilant.all_active(status, app_name), + timeout=10 * 60, + delay=10, + ) + return app_name diff --git a/charms/tests/integration/test_garm.py b/charms/tests/integration/test_garm.py new file mode 100644 index 00000000..ab30cb6b --- /dev/null +++ b/charms/tests/integration/test_garm.py @@ -0,0 +1,105 @@ +# Copyright 2026 Canonical Ltd. +# See LICENSE file for licensing details. + +"""Integration tests for the GARM charm.""" + +import json + +import jubilant +import pytest + +GARM_BINARY = "/usr/local/bin/garm" +GARM_PROVIDER_BINARY = "/usr/local/bin/garm-provider-openstack" +GARM_CONFIG_PATH = "/etc/garm/config.toml" +GARM_SECRETS_LABEL = "garm-secrets" + + +def test_garm_rock_contains_binaries( + juju: jubilant.Juju, + garm_app: str, +): + """ + arrange: The GARM charm is deployed with the built ROCK image. + act: Execute a file-existence check for GARM binaries inside the workload container. + assert: Both the GARM server binary and the OpenStack provider binary are present. + """ + unit = f"{garm_app}/0" + result = juju.exec(unit, ["ls", GARM_BINARY, GARM_PROVIDER_BINARY]) + + assert result.return_code == 0, ( + f"Expected GARM binaries at {GARM_BINARY} and {GARM_PROVIDER_BINARY}, " + f"got: {result.stderr}" + ) + + +def test_garm_charm_reaches_active( + juju: jubilant.Juju, + garm_app: str, +): + """ + arrange: The GARM charm is deployed with the built ROCK image and default config. + act: Observe the Juju application status. + assert: The application is in active status, confirming a successful install. + """ + status = juju.status() + + assert jubilant.all_active(status, garm_app), ( + f"Expected {garm_app} to be active, got: " + f"{status.apps[garm_app].app_status.current}" + ) + + +def test_garm_pebble_service_command( + juju: jubilant.Juju, + garm_app: str, +): + """ + arrange: The GARM charm is deployed and active. + act: Read the Pebble plan from the workload container. + assert: The Pebble service runs the GARM binary with the canonical config flag. + """ + unit = f"{garm_app}/0" + result = juju.exec(unit, ["pebble", "plan"]) + + assert result.return_code == 0, f"pebble plan failed: {result.stderr}" + plan_output = result.stdout + assert GARM_BINARY in plan_output, ( + f"Expected {GARM_BINARY} in pebble plan, got: {plan_output}" + ) + assert f"-config {GARM_CONFIG_PATH}" in plan_output, ( + f"Expected '-config {GARM_CONFIG_PATH}' in pebble plan, got: {plan_output}" + ) + + +def test_garm_juju_secret_has_expected_keys( + juju: jubilant.Juju, + garm_app: str, +): + """ + arrange: The GARM charm is deployed and active (leader has initialised secrets). + act: List Juju secrets and show the garm-secrets secret content. + assert: The garm-secrets secret contains both jwt-secret and db-passphrase keys. + """ + secrets_json = juju.cli("secrets", "--format=json") + secrets = json.loads(secrets_json) + + garm_secret_uri = None + for uri, info in secrets.items(): + if info.get("label") == GARM_SECRETS_LABEL: + garm_secret_uri = uri + break + + assert garm_secret_uri is not None, ( + f"Expected a Juju secret labelled '{GARM_SECRETS_LABEL}' to exist" + ) + + secret_json = juju.cli("show-secret", "--reveal", "--format=json", garm_secret_uri) + secret = json.loads(secret_json) + content = secret[garm_secret_uri]["content"]["Data"] + + assert "jwt-secret" in content, ( + f"Expected 'jwt-secret' key in {GARM_SECRETS_LABEL}, got keys: {list(content)}" + ) + assert "db-passphrase" in content, ( + f"Expected 'db-passphrase' key in {GARM_SECRETS_LABEL}, got keys: {list(content)}" + ) From 2a00058ca4e48cd692a3a5b7316a290a9672a729 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 12:08:51 +0800 Subject: [PATCH 14/29] ci: add GARM charm to lint/unit matrix, publish workflow, and integration tox env - charms_lint_and_unit.yaml: add charms/garm-operator to the charm matrix so tox lint/complexity/static/unit/coverage runs on every PR - publish_charms.yml: add garm to the publish matrix so the ROCK and charm are built and published to Charmhub on merge to main - tox.ini: add garm-integration env for targeted Juju charm integration test runs The charms_integration.yaml workflow already covers GARM integration tests via the charms-integration env (whole charms/tests/integration/ directory). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/charms_lint_and_unit.yaml | 1 + .github/workflows/publish_charms.yml | 1 + tox.ini | 16 ++++++++++++++++ 3 files changed, 18 insertions(+) diff --git a/.github/workflows/charms_lint_and_unit.yaml b/.github/workflows/charms_lint_and_unit.yaml index 31bf04b6..1cba77fd 100644 --- a/.github/workflows/charms_lint_and_unit.yaml +++ b/.github/workflows/charms_lint_and_unit.yaml @@ -35,6 +35,7 @@ jobs: charm: - charms/planner-operator - charms/webhook-gateway-operator + - charms/garm-operator steps: - uses: actions/checkout@v6 diff --git a/.github/workflows/publish_charms.yml b/.github/workflows/publish_charms.yml index c5a0b432..b41d7e8e 100644 --- a/.github/workflows/publish_charms.yml +++ b/.github/workflows/publish_charms.yml @@ -19,6 +19,7 @@ jobs: charm: - planner - webhook-gateway + - garm name: ${{ github.event_name == 'push' && '' || 'Test ' }}Publish Charm (${{ matrix.charm }}) runs-on: ubuntu-latest steps: diff --git a/tox.ini b/tox.ini index c8e7294b..35c56c0c 100644 --- a/tox.ini +++ b/tox.ini @@ -23,6 +23,22 @@ pass_env = +[testenv:garm-integration] +pass_env = + PYTEST_ADDOPTS +description = Run GARM charm integration tests +deps = + pytest + pytest-operator + -r {[vars]tests_path}/integration/requirements.txt +commands = + pytest -v \ + -s \ + --tb native \ + --log-cli-level=INFO \ + {[vars]tests_path}/integration/test_garm.py \ + {posargs} + [testenv:webhook-gateway-integration] pass_env = PYTEST_ADDOPTS From 02c909d0ad3b797b2bdeff35c4ee1c2d1909e3a3 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 12:10:01 +0800 Subject: [PATCH 15/29] ci: comment out garm from publish matrix until charm is functional Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/publish_charms.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish_charms.yml b/.github/workflows/publish_charms.yml index b41d7e8e..df2da23b 100644 --- a/.github/workflows/publish_charms.yml +++ b/.github/workflows/publish_charms.yml @@ -19,7 +19,7 @@ jobs: charm: - planner - webhook-gateway - - garm + # - garm # not yet functional; uncomment when ready to publish name: ${{ github.event_name == 'push' && '' || 'Test ' }}Publish Charm (${{ matrix.charm }}) runs-on: ubuntu-latest steps: From f983a25cc231cd6ae503e3dc4c35cae37092bb9d Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 12:17:54 +0800 Subject: [PATCH 16/29] fix: remove --only-binary=pluggy from tox unit deps tox-uv translates this flag to --only-binary {'pluggy'} which is invalid for uv. The constraint is also unnecessary since uv always prefers binary wheels by default. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/tox.toml | 2 +- charms/planner-operator/tox.toml | 2 +- charms/webhook-gateway-operator/tox.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/charms/garm-operator/tox.toml b/charms/garm-operator/tox.toml index 3349ae98..8bd49c05 100644 --- a/charms/garm-operator/tox.toml +++ b/charms/garm-operator/tox.toml @@ -26,7 +26,7 @@ commands = [["pyright"]] [env.unit] description = "Run unit tests" -deps = ["--only-binary=pluggy", "pytest", "coverage[toml]", "tomli; python_version < '3.11'", "-r requirements.txt"] +deps = ["pytest", "coverage[toml]", "tomli; python_version < '3.11'", "-r requirements.txt"] commands = [ [ "coverage", diff --git a/charms/planner-operator/tox.toml b/charms/planner-operator/tox.toml index 691082a5..598d1fdc 100644 --- a/charms/planner-operator/tox.toml +++ b/charms/planner-operator/tox.toml @@ -26,7 +26,7 @@ commands = [["pyright"]] [env.unit] description = "Run unit tests" -deps = ["--only-binary=pluggy", "pytest", "requests-mock", "coverage[toml]", "-r requirements.txt"] +deps = ["pytest", "requests-mock", "coverage[toml]", "-r requirements.txt"] commands = [ [ "coverage", diff --git a/charms/webhook-gateway-operator/tox.toml b/charms/webhook-gateway-operator/tox.toml index 6f12eb17..cf46531a 100644 --- a/charms/webhook-gateway-operator/tox.toml +++ b/charms/webhook-gateway-operator/tox.toml @@ -26,7 +26,7 @@ commands = [["pyright"]] [env.unit] description = "Run unit tests" -deps = ["--only-binary=pluggy", "pytest", "pytest-custom-exit-code", "coverage[toml]", "-r requirements.txt"] +deps = ["pytest", "pytest-custom-exit-code", "coverage[toml]", "-r requirements.txt"] commands = [ [ "coverage", From ca08dce1c5e9e6d83d5d7ce72cf9a0a707d56e1e Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 12:19:47 +0800 Subject: [PATCH 17/29] chore: remove unused pytest import and fix blank line Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/tests/unit/test_charm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/charms/garm-operator/tests/unit/test_charm.py b/charms/garm-operator/tests/unit/test_charm.py index 6bcbbcf2..fcc556cb 100644 --- a/charms/garm-operator/tests/unit/test_charm.py +++ b/charms/garm-operator/tests/unit/test_charm.py @@ -8,8 +8,6 @@ except ImportError: import tomli as tomllib # type: ignore[no-redef] -import pytest - from charm import render_garm_toml @@ -109,6 +107,7 @@ def test_render_garm_toml_provider_section(): # Secret management tests (no Harness needed — test the helper directly) # --------------------------------------------------------------------------- + def test_generate_garm_secrets_returns_hex_strings(): """ arrange: Nothing. From 0a6cc2328418331c1ac5148b5e55e37440d4ad86 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 13:17:30 +0800 Subject: [PATCH 18/29] docs: add changelog entry for GARM charm scaffold Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/changelog.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 9cbcf21c..e588a665 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -8,6 +8,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Each revision is versioned by the date of the revision. +## 2026-05-25 + +- add GARM (GitHub Actions Runner Manager) 12-factor charm scaffold with ROCK image, Juju secret management, and TOML config rendering. + ## 2026-04-22 - add action to allow workflow authors to opt in to forwarding specific log files from self-hosted GitHub runners to Loki through the OpenTelemetry Collector snap. From 18df9e856cb9d22332515b8cca33932f44281554 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 13:38:01 +0800 Subject: [PATCH 19/29] fix: use go/1.26/stable and GOTOOLCHAIN=local for ROCK build GARM v0.2.1 requires go 1.26.2 and garm-provider-openstack v0.1.5 requires go 1.25.0. Using go/1.24/stable caused Go's toolchain manager to try to auto-download a newer version, which polluted the output of 'go version' and crashed rockcraft's go plugin validator. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- garm-rockcraft.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/garm-rockcraft.yaml b/garm-rockcraft.yaml index 545ca07a..42106538 100644 --- a/garm-rockcraft.yaml +++ b/garm-rockcraft.yaml @@ -19,7 +19,9 @@ parts: # renovate: datasource=github-releases depName=cloudbase/garm source-tag: v0.2.1 build-snaps: - - go/1.24/stable + - go/1.26/stable + build-environment: + - GOTOOLCHAIN: local override-build: | cd "$CRAFT_PART_SRC" go build -tags osusergo,netgo,sqlite_omit_load_extension -o bin/garm ./cmd/garm @@ -33,7 +35,9 @@ parts: # renovate: datasource=github-releases depName=cloudbase/garm-provider-openstack source-tag: v0.1.5 build-snaps: - - go/1.24/stable + - go/1.26/stable + build-environment: + - GOTOOLCHAIN: local override-build: | cd "$CRAFT_PART_SRC" go build -o bin/garm-provider-openstack . From 091d7e2717131c9e11fbfb507ae8c6c5776a281e Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 14:01:44 +0800 Subject: [PATCH 20/29] refactor: remove db-passphrase SQLite encryption placeholder The db-passphrase secret key was a placeholder for a future SQLite encryption story that has no timeline. Remove it to keep the secrets surface minimal and accurate. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm-operator/src/charm.py | 4 +--- charms/garm-operator/tests/unit/test_charm.py | 7 ++----- charms/tests/integration/test_garm.py | 5 +---- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/charms/garm-operator/src/charm.py b/charms/garm-operator/src/charm.py index 516e6138..694cdda9 100644 --- a/charms/garm-operator/src/charm.py +++ b/charms/garm-operator/src/charm.py @@ -80,12 +80,10 @@ def _generate_garm_secrets() -> dict[str, str]: """Generate a fresh set of GARM secrets. Returns: - Dict with keys ``jwt-secret`` and ``db-passphrase``, each a 64-char hex string. + Dict with key ``jwt-secret`` as a 64-char hex string. """ return { "jwt-secret": secrets.token_hex(32), - # Reserved for future SQLite encryption support (scaffold placeholder) - "db-passphrase": secrets.token_hex(32), } diff --git a/charms/garm-operator/tests/unit/test_charm.py b/charms/garm-operator/tests/unit/test_charm.py index fcc556cb..89d7ca62 100644 --- a/charms/garm-operator/tests/unit/test_charm.py +++ b/charms/garm-operator/tests/unit/test_charm.py @@ -112,16 +112,14 @@ def test_generate_garm_secrets_returns_hex_strings(): """ arrange: Nothing. act: Call _generate_garm_secrets(). - assert: Returns a dict with jwt-secret and db-passphrase as 64-char hex strings. + assert: Returns a dict with jwt-secret as a 64-char hex string. """ from charm import _generate_garm_secrets result = _generate_garm_secrets() - assert set(result.keys()) == {"jwt-secret", "db-passphrase"} + assert set(result.keys()) == {"jwt-secret"} assert len(result["jwt-secret"]) == 64 - assert len(result["db-passphrase"]) == 64 assert all(c in "0123456789abcdef" for c in result["jwt-secret"]) - assert all(c in "0123456789abcdef" for c in result["db-passphrase"]) def test_generate_garm_secrets_produces_unique_values(): @@ -135,4 +133,3 @@ def test_generate_garm_secrets_produces_unique_values(): first = _generate_garm_secrets() second = _generate_garm_secrets() assert first["jwt-secret"] != second["jwt-secret"] - assert first["db-passphrase"] != second["db-passphrase"] diff --git a/charms/tests/integration/test_garm.py b/charms/tests/integration/test_garm.py index ab30cb6b..399f9a88 100644 --- a/charms/tests/integration/test_garm.py +++ b/charms/tests/integration/test_garm.py @@ -78,7 +78,7 @@ def test_garm_juju_secret_has_expected_keys( """ arrange: The GARM charm is deployed and active (leader has initialised secrets). act: List Juju secrets and show the garm-secrets secret content. - assert: The garm-secrets secret contains both jwt-secret and db-passphrase keys. + assert: The garm-secrets secret contains the jwt-secret key. """ secrets_json = juju.cli("secrets", "--format=json") secrets = json.loads(secrets_json) @@ -100,6 +100,3 @@ def test_garm_juju_secret_has_expected_keys( assert "jwt-secret" in content, ( f"Expected 'jwt-secret' key in {GARM_SECRETS_LABEL}, got keys: {list(content)}" ) - assert "db-passphrase" in content, ( - f"Expected 'db-passphrase' key in {GARM_SECRETS_LABEL}, got keys: {list(content)}" - ) From 6b0aaa066c1102f44a26423b25df5db24ef22600 Mon Sep 17 00:00:00 2001 From: Yanks Yoon <37652070+yanksyoon@users.noreply.github.com> Date: Mon, 25 May 2026 14:16:38 +0800 Subject: [PATCH 21/29] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- charms/tests/integration/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/charms/tests/integration/conftest.py b/charms/tests/integration/conftest.py index aa959d16..d2ffa1d5 100644 --- a/charms/tests/integration/conftest.py +++ b/charms/tests/integration/conftest.py @@ -317,6 +317,8 @@ def garm_charm_file_fixture(pytestconfig: pytest.Config) -> str | None: return None if len(charm) > 1: garm_charm = [file for file in charm if "garm" in file] + if not garm_charm: + raise pytest.UsageError("No GARM charm file found in --charm-file; expected a path containing 'garm'.") return garm_charm[0] return charm[0] From da290da6ef542d3cf64b05e1016456e3a817ec9d Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 14:28:05 +0800 Subject: [PATCH 22/29] chore: remove unused postgresql_k8s charm lib Not required by paas_charm and not used by the charm itself. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../charms/postgresql_k8s/v0/postgresql.py | 1035 ----------------- 1 file changed, 1035 deletions(-) delete mode 100644 charms/garm-operator/lib/charms/postgresql_k8s/v0/postgresql.py diff --git a/charms/garm-operator/lib/charms/postgresql_k8s/v0/postgresql.py b/charms/garm-operator/lib/charms/postgresql_k8s/v0/postgresql.py deleted file mode 100644 index 45b7bb64..00000000 --- a/charms/garm-operator/lib/charms/postgresql_k8s/v0/postgresql.py +++ /dev/null @@ -1,1035 +0,0 @@ -# Copyright 2022 Canonical Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""PostgreSQL helper class. - -The `postgresql` module provides methods for interacting with the PostgreSQL instance. - -Any charm using this library should import the `psycopg2` or `psycopg2-binary` dependency. -""" - -import logging -from collections import OrderedDict -from typing import Dict, List, Optional, Set, Tuple - -import psycopg2 -from ops.model import Relation -from psycopg2.sql import SQL, Composed, Identifier, Literal - -# The unique Charmhub library identifier, never change it -LIBID = "24ee217a54e840a598ff21a079c3e678" - -# Increment this major API version when introducing breaking changes -LIBAPI = 0 - -# Increment this PATCH version before using `charmcraft publish-lib` or reset -# to 0 if you are raising the major API version -LIBPATCH = 58 - -# Groups to distinguish HBA access -ACCESS_GROUP_IDENTITY = "identity_access" -ACCESS_GROUP_INTERNAL = "internal_access" -ACCESS_GROUP_RELATION = "relation_access" - -# List of access groups to filter role assignments by -ACCESS_GROUPS = [ - ACCESS_GROUP_IDENTITY, - ACCESS_GROUP_INTERNAL, - ACCESS_GROUP_RELATION, -] - -# Groups to distinguish database permissions -PERMISSIONS_GROUP_ADMIN = "admin" - -INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE = "invalid role(s) for extra user roles" - -REQUIRED_PLUGINS = { - "address_standardizer": ["postgis"], - "address_standardizer_data_us": ["postgis"], - "jsonb_plperl": ["plperl"], - "postgis_raster": ["postgis"], - "postgis_tiger_geocoder": ["postgis", "fuzzystrmatch"], - "postgis_topology": ["postgis"], -} -DEPENDENCY_PLUGINS = set() -for dependencies in REQUIRED_PLUGINS.values(): - DEPENDENCY_PLUGINS |= set(dependencies) - -logger = logging.getLogger(__name__) - - -class PostgreSQLAssignGroupError(Exception): - """Exception raised when assigning to a group fails.""" - - -class PostgreSQLCreateDatabaseError(Exception): - """Exception raised when creating a database fails.""" - - -class PostgreSQLCreateGroupError(Exception): - """Exception raised when creating a group fails.""" - - -class PostgreSQLCreateUserError(Exception): - """Exception raised when creating a user fails.""" - - def __init__(self, message: Optional[str] = None): - super().__init__(message) - self.message = message - - -class PostgreSQLDatabasesSetupError(Exception): - """Exception raised when the databases setup fails.""" - - -class PostgreSQLDeleteUserError(Exception): - """Exception raised when deleting a user fails.""" - - -class PostgreSQLEnableDisableExtensionError(Exception): - """Exception raised when enabling/disabling an extension fails.""" - - -class PostgreSQLGetLastArchivedWALError(Exception): - """Exception raised when retrieving last archived WAL fails.""" - - -class PostgreSQLGetCurrentTimelineError(Exception): - """Exception raised when retrieving current timeline id for the PostgreSQL unit fails.""" - - -class PostgreSQLGetPostgreSQLVersionError(Exception): - """Exception raised when retrieving PostgreSQL version fails.""" - - -class PostgreSQLListAccessibleDatabasesForUserError(Exception): - """Exception raised when retrieving the accessible databases for a user fails.""" - - -class PostgreSQLListGroupsError(Exception): - """Exception raised when retrieving PostgreSQL groups list fails.""" - - -class PostgreSQLListUsersError(Exception): - """Exception raised when retrieving PostgreSQL users list fails.""" - - -class PostgreSQLUpdateUserPasswordError(Exception): - """Exception raised when updating a user password fails.""" - - -class PostgreSQL: - """Class to encapsulate all operations related to interacting with PostgreSQL instance.""" - - def __init__( - self, - primary_host: str, - current_host: str, - user: str, - password: str, - database: str, - system_users: Optional[List[str]] = None, - ): - self.primary_host = primary_host - self.current_host = current_host - self.user = user - self.password = password - self.database = database - self.system_users = system_users if system_users else [] - - def _configure_pgaudit(self, enable: bool) -> None: - connection = None - try: - connection = self._connect_to_database() - connection.autocommit = True - with connection.cursor() as cursor: - if enable: - cursor.execute("ALTER SYSTEM SET pgaudit.log = 'ROLE,DDL,MISC,MISC_SET';") - cursor.execute("ALTER SYSTEM SET pgaudit.log_client TO off;") - cursor.execute("ALTER SYSTEM SET pgaudit.log_parameter TO off;") - else: - cursor.execute("ALTER SYSTEM RESET pgaudit.log;") - cursor.execute("ALTER SYSTEM RESET pgaudit.log_client;") - cursor.execute("ALTER SYSTEM RESET pgaudit.log_parameter;") - cursor.execute("SELECT pg_reload_conf();") - finally: - if connection is not None: - connection.close() - - def _connect_to_database( - self, database: Optional[str] = None, database_host: Optional[str] = None - ) -> psycopg2.extensions.connection: - """Creates a connection to the database. - - Args: - database: database to connect to (defaults to the database - provided when the object for this class was created). - database_host: host to connect to instead of the primary host. - - Returns: - psycopg2 connection object. - """ - host = database_host if database_host is not None else self.primary_host - connection = psycopg2.connect( - f"dbname='{database if database else self.database}' user='{self.user}' host='{host}'" - f"password='{self.password}' connect_timeout=1" - ) - connection.autocommit = True - return connection - - def create_access_groups(self) -> None: - """Create access groups to distinguish HBA authentication methods.""" - connection = None - try: - with self._connect_to_database() as connection, connection.cursor() as cursor: - for group in ACCESS_GROUPS: - cursor.execute( - SQL("SELECT TRUE FROM pg_roles WHERE rolname={};").format(Literal(group)) - ) - if cursor.fetchone() is not None: - continue - cursor.execute( - SQL("CREATE ROLE {} NOLOGIN;").format( - Identifier(group), - ) - ) - except psycopg2.Error as e: - logger.error(f"Failed to create access groups: {e}") - raise PostgreSQLCreateGroupError() from e - finally: - if connection is not None: - connection.close() - - def create_database( - self, - database: str, - user: str, - plugins: Optional[List[str]] = None, - client_relations: Optional[List[Relation]] = None, - ) -> None: - """Creates a new database and grant privileges to a user on it. - - Args: - database: database to be created. - user: user that will have access to the database. - plugins: extensions to enable in the new database. - client_relations: current established client relations. - """ - plugins = plugins if plugins else [] - client_relations = client_relations if client_relations else [] - try: - connection = self._connect_to_database() - cursor = connection.cursor() - cursor.execute( - SQL("SELECT datname FROM pg_database WHERE datname={};").format(Literal(database)) - ) - if cursor.fetchone() is None: - cursor.execute(SQL("CREATE DATABASE {};").format(Identifier(database))) - cursor.execute( - SQL("REVOKE ALL PRIVILEGES ON DATABASE {} FROM PUBLIC;").format( - Identifier(database) - ) - ) - for user_to_grant_access in [user, PERMISSIONS_GROUP_ADMIN, *self.system_users]: - cursor.execute( - SQL("GRANT ALL PRIVILEGES ON DATABASE {} TO {};").format( - Identifier(database), Identifier(user_to_grant_access) - ) - ) - relations_accessing_this_database = 0 - for relation in client_relations: - for data in relation.data.values(): - if data.get("database") == database: - relations_accessing_this_database += 1 - with self._connect_to_database(database=database) as conn, conn.cursor() as curs: - curs.execute( - "SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT LIKE 'pg_%' and schema_name <> 'information_schema';" - ) - schemas = [row[0] for row in curs.fetchall()] - statements = self._generate_database_privileges_statements( - relations_accessing_this_database, schemas, user - ) - for statement in statements: - curs.execute(statement) - except psycopg2.Error as e: - logger.error(f"Failed to create database: {e}") - raise PostgreSQLCreateDatabaseError() from e - - # Enable preset extensions - if plugins: - self.enable_disable_extensions(dict.fromkeys(plugins, True), database) - - def create_user( - self, - user: str, - password: Optional[str] = None, - admin: bool = False, - extra_user_roles: Optional[List[str]] = None, - ) -> None: - """Creates a database user. - - Args: - user: user to be created. - password: password to be assigned to the user. - admin: whether the user should have additional admin privileges. - extra_user_roles: additional privileges and/or roles to be assigned to the user. - """ - try: - # Separate roles and privileges from the provided extra user roles. - admin_role = False - roles = privileges = None - if extra_user_roles: - admin_role = PERMISSIONS_GROUP_ADMIN in extra_user_roles - valid_privileges, valid_roles = self.list_valid_privileges_and_roles() - roles = [ - role - for role in extra_user_roles - if role in valid_roles and role != PERMISSIONS_GROUP_ADMIN - ] - privileges = { - extra_user_role - for extra_user_role in extra_user_roles - if extra_user_role not in roles and extra_user_role != PERMISSIONS_GROUP_ADMIN - } - invalid_privileges = [ - privilege for privilege in privileges if privilege not in valid_privileges - ] - if "relation_access" in invalid_privileges: - logger.warning("Extra user role relation_access not available. Skipping role.") - invalid_privileges.remove("relation_access") - privileges.remove("relation_access") - if len(invalid_privileges) > 0: - logger.error(f"Invalid extra user roles: {', '.join(privileges)}") - raise PostgreSQLCreateUserError(INVALID_EXTRA_USER_ROLE_BLOCKING_MESSAGE) - - with self._connect_to_database() as connection, connection.cursor() as cursor: - # Create or update the user. - cursor.execute( - SQL("SELECT TRUE FROM pg_roles WHERE rolname={};").format(Literal(user)) - ) - if cursor.fetchone() is not None: - user_definition = "ALTER ROLE {}" - else: - user_definition = "CREATE ROLE {}" - user_definition += f"WITH {'NOLOGIN' if user == 'admin' else 'LOGIN'}{' SUPERUSER' if admin else ''} ENCRYPTED PASSWORD '{password}'{'IN ROLE admin CREATEDB' if admin_role else ''}" - if privileges: - user_definition += f" {' '.join(privileges)}" - cursor.execute(SQL("BEGIN;")) - cursor.execute(SQL("SET LOCAL log_statement = 'none';")) - cursor.execute(SQL(f"{user_definition};").format(Identifier(user))) - cursor.execute(SQL("COMMIT;")) - - # Add extra user roles to the new user. - if roles: - for role in roles: - cursor.execute( - SQL("GRANT {} TO {};").format(Identifier(role), Identifier(user)) - ) - except psycopg2.Error as e: - logger.error(f"Failed to create user: {e}") - raise PostgreSQLCreateUserError() from e - - def delete_user(self, user: str) -> None: - """Deletes a database user. - - Args: - user: user to be deleted. - """ - # First of all, check whether the user exists. Otherwise, do nothing. - users = self.list_users() - if user not in users: - return - - # List all databases. - try: - with self._connect_to_database() as connection, connection.cursor() as cursor: - cursor.execute("SELECT datname FROM pg_database WHERE datistemplate = false;") - databases = [row[0] for row in cursor.fetchall()] - - # Existing objects need to be reassigned in each database - # before the user can be deleted. - for database in databases: - with self._connect_to_database( - database - ) as connection, connection.cursor() as cursor: - cursor.execute( - SQL("REASSIGN OWNED BY {} TO {};").format( - Identifier(user), Identifier(self.user) - ) - ) - cursor.execute(SQL("DROP OWNED BY {};").format(Identifier(user))) - - # Delete the user. - with self._connect_to_database() as connection, connection.cursor() as cursor: - cursor.execute(SQL("DROP ROLE {};").format(Identifier(user))) - except psycopg2.Error as e: - logger.error(f"Failed to delete user: {e}") - raise PostgreSQLDeleteUserError() from e - - def grant_internal_access_group_memberships(self) -> None: - """Grant membership to the internal access-group to existing internal users.""" - connection = None - try: - with self._connect_to_database() as connection, connection.cursor() as cursor: - for user in self.system_users: - cursor.execute( - SQL("GRANT {} TO {};").format( - Identifier(ACCESS_GROUP_INTERNAL), - Identifier(user), - ) - ) - except psycopg2.Error as e: - logger.error(f"Failed to grant internal access group memberships: {e}") - raise PostgreSQLAssignGroupError() from e - finally: - if connection is not None: - connection.close() - - def grant_relation_access_group_memberships(self) -> None: - """Grant membership to the relation access-group to existing relation users.""" - rel_users = self.list_users_from_relation() - if not rel_users: - return - - connection = None - try: - with self._connect_to_database() as connection, connection.cursor() as cursor: - rel_groups = SQL(",").join(Identifier(group) for group in [ACCESS_GROUP_RELATION]) - rel_users = SQL(",").join(Identifier(user) for user in rel_users) - - cursor.execute( - SQL("GRANT {groups} TO {users};").format( - groups=rel_groups, - users=rel_users, - ) - ) - except psycopg2.Error as e: - logger.error(f"Failed to grant relation access group memberships: {e}") - raise PostgreSQLAssignGroupError() from e - finally: - if connection is not None: - connection.close() - - def enable_disable_extensions( - self, extensions: Dict[str, bool], database: Optional[str] = None - ) -> None: - """Enables or disables a PostgreSQL extension. - - Args: - extensions: the name of the extensions. - database: optional database where to enable/disable the extension. - - Raises: - PostgreSQLEnableDisableExtensionError if the operation fails. - """ - connection = None - try: - if database is not None: - databases = [database] - else: - # Retrieve all the databases. - with self._connect_to_database() as connection, connection.cursor() as cursor: - cursor.execute("SELECT datname FROM pg_database WHERE NOT datistemplate;") - databases = {database[0] for database in cursor.fetchall()} - - ordered_extensions = OrderedDict() - for plugin in DEPENDENCY_PLUGINS: - ordered_extensions[plugin] = extensions.get(plugin, False) - for extension, enable in extensions.items(): - ordered_extensions[extension] = enable - - self._configure_pgaudit(False) - - # Enable/disabled the extension in each database. - for database in databases: - with self._connect_to_database( - database=database - ) as connection, connection.cursor() as cursor: - for extension, enable in ordered_extensions.items(): - cursor.execute( - f"CREATE EXTENSION IF NOT EXISTS {extension};" - if enable - else f"DROP EXTENSION IF EXISTS {extension};" - ) - self._configure_pgaudit(ordered_extensions.get("pgaudit", False)) - except psycopg2.errors.UniqueViolation: - pass - except psycopg2.errors.DependentObjectsStillExist: - raise - except psycopg2.Error as e: - raise PostgreSQLEnableDisableExtensionError() from e - finally: - if connection is not None: - connection.close() - - def _generate_database_privileges_statements( - self, relations_accessing_this_database: int, schemas: List[str], user: str - ) -> List[Composed]: - """Generates a list of databases privileges statements.""" - statements = [] - if relations_accessing_this_database == 1: - statements.append( - SQL( - """DO $$ -DECLARE r RECORD; -BEGIN - FOR r IN (SELECT statement FROM (SELECT 1 AS index,'ALTER TABLE '|| schemaname || '."' || tablename ||'" OWNER TO {};' AS statement -FROM pg_tables WHERE NOT schemaname IN ('pg_catalog', 'information_schema') -UNION SELECT 2 AS index,'ALTER SEQUENCE '|| sequence_schema || '."' || sequence_name ||'" OWNER TO {};' AS statement -FROM information_schema.sequences WHERE NOT sequence_schema IN ('pg_catalog', 'information_schema') -UNION SELECT 3 AS index,'ALTER FUNCTION '|| nsp.nspname || '."' || p.proname ||'"('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {};' AS statement -FROM pg_proc p JOIN pg_namespace nsp ON p.pronamespace = nsp.oid WHERE NOT nsp.nspname IN ('pg_catalog', 'information_schema') AND p.prokind = 'f' -UNION SELECT 4 AS index,'ALTER PROCEDURE '|| nsp.nspname || '."' || p.proname ||'"('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {};' AS statement -FROM pg_proc p JOIN pg_namespace nsp ON p.pronamespace = nsp.oid WHERE NOT nsp.nspname IN ('pg_catalog', 'information_schema') AND p.prokind = 'p' -UNION SELECT 5 AS index,'ALTER AGGREGATE '|| nsp.nspname || '."' || p.proname ||'"('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {};' AS statement -FROM pg_proc p JOIN pg_namespace nsp ON p.pronamespace = nsp.oid WHERE NOT nsp.nspname IN ('pg_catalog', 'information_schema') AND p.prokind = 'a' -UNION SELECT 6 AS index,'ALTER VIEW '|| schemaname || '."' || viewname ||'" OWNER TO {};' AS statement -FROM pg_catalog.pg_views WHERE NOT schemaname IN ('pg_catalog', 'information_schema')) AS statements ORDER BY index) LOOP - EXECUTE format(r.statement); - END LOOP; -END; $$;""" - ).format( - Identifier(user), - Identifier(user), - Identifier(user), - Identifier(user), - Identifier(user), - Identifier(user), - ) - ) - statements.append( - SQL( - "UPDATE pg_catalog.pg_largeobject_metadata\n" - "SET lomowner = (SELECT oid FROM pg_roles WHERE rolname = {})\n" - "WHERE lomowner = (SELECT oid FROM pg_roles WHERE rolname = {});" - ).format(Literal(user), Literal(self.user)) - ) - for schema in schemas: - statements.append( - SQL("ALTER SCHEMA {} OWNER TO {};").format( - Identifier(schema), Identifier(user) - ) - ) - else: - for schema in schemas: - schema = Identifier(schema) - statements.extend([ - SQL("GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA {} TO {};").format( - schema, Identifier(user) - ), - SQL("GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA {} TO {};").format( - schema, Identifier(user) - ), - SQL("GRANT ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA {} TO {};").format( - schema, Identifier(user) - ), - SQL("GRANT USAGE ON SCHEMA {} TO {};").format(schema, Identifier(user)), - SQL("GRANT CREATE ON SCHEMA {} TO {};").format(schema, Identifier(user)), - ]) - return statements - - def get_last_archived_wal(self) -> str: - """Get the name of the last archived wal for the current PostgreSQL cluster.""" - try: - with self._connect_to_database() as connection, connection.cursor() as cursor: - cursor.execute("SELECT last_archived_wal FROM pg_stat_archiver;") - return cursor.fetchone()[0] - except psycopg2.Error as e: - logger.error(f"Failed to get PostgreSQL last archived WAL: {e}") - raise PostgreSQLGetLastArchivedWALError() from e - - def get_current_timeline(self) -> str: - """Get the timeline id for the current PostgreSQL unit.""" - try: - with self._connect_to_database() as connection, connection.cursor() as cursor: - cursor.execute("SELECT timeline_id FROM pg_control_checkpoint();") - return cursor.fetchone()[0] - except psycopg2.Error as e: - logger.error(f"Failed to get PostgreSQL current timeline id: {e}") - raise PostgreSQLGetCurrentTimelineError() from e - - def get_postgresql_text_search_configs(self) -> Set[str]: - """Returns the PostgreSQL available text search configs. - - Returns: - Set of PostgreSQL text search configs. - """ - with self._connect_to_database( - database_host=self.current_host - ) as connection, connection.cursor() as cursor: - cursor.execute("SELECT CONCAT('pg_catalog.', cfgname) FROM pg_ts_config;") - text_search_configs = cursor.fetchall() - return {text_search_config[0] for text_search_config in text_search_configs} - - def get_postgresql_timezones(self) -> Set[str]: - """Returns the PostgreSQL available timezones. - - Returns: - Set of PostgreSQL timezones. - """ - with self._connect_to_database( - database_host=self.current_host - ) as connection, connection.cursor() as cursor: - cursor.execute("SELECT name FROM pg_timezone_names;") - timezones = cursor.fetchall() - return {timezone[0] for timezone in timezones} - - def get_postgresql_default_table_access_methods(self) -> Set[str]: - """Returns the PostgreSQL available table access methods. - - Returns: - Set of PostgreSQL table access methods. - """ - with self._connect_to_database( - database_host=self.current_host - ) as connection, connection.cursor() as cursor: - cursor.execute("SELECT amname FROM pg_am WHERE amtype = 't';") - access_methods = cursor.fetchall() - return {access_method[0] for access_method in access_methods} - - def get_postgresql_version(self, current_host=True) -> str: - """Returns the PostgreSQL version. - - Returns: - PostgreSQL version number. - """ - host = self.current_host if current_host else None - try: - with self._connect_to_database( - database_host=host - ) as connection, connection.cursor() as cursor: - cursor.execute("SELECT version();") - # Split to get only the version number. - return cursor.fetchone()[0].split(" ")[1] - except psycopg2.Error as e: - logger.error(f"Failed to get PostgreSQL version: {e}") - raise PostgreSQLGetPostgreSQLVersionError() from e - - def is_tls_enabled(self, check_current_host: bool = False) -> bool: - """Returns whether TLS is enabled. - - Args: - check_current_host: whether to check the current host - instead of the primary host. - - Returns: - whether TLS is enabled. - """ - try: - with self._connect_to_database( - database_host=self.current_host if check_current_host else None - ) as connection, connection.cursor() as cursor: - cursor.execute("SHOW ssl;") - return "on" in cursor.fetchone()[0] - except psycopg2.Error: - # Connection errors happen when PostgreSQL has not started yet. - return False - - def list_access_groups(self, current_host=False) -> Set[str]: - """Returns the list of PostgreSQL database access groups. - - Args: - current_host: whether to check the current host - instead of the primary host. - - Returns: - List of PostgreSQL database access groups. - """ - connection = None - host = self.current_host if current_host else None - try: - with self._connect_to_database( - database_host=host - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT groname FROM pg_catalog.pg_group WHERE groname LIKE '%_access';" - ) - access_groups = cursor.fetchall() - return {group[0] for group in access_groups} - except psycopg2.Error as e: - logger.error(f"Failed to list PostgreSQL database access groups: {e}") - raise PostgreSQLListGroupsError() from e - finally: - if connection is not None: - connection.close() - - def list_accessible_databases_for_user(self, user: str, current_host=False) -> Set[str]: - """Returns the list of accessible databases for a specific user. - - Args: - user: the user to check. - current_host: whether to check the current host - instead of the primary host. - - Returns: - List of accessible database (the ones where - the user has the CONNECT privilege). - """ - connection = None - host = self.current_host if current_host else None - try: - with self._connect_to_database( - database_host=host - ) as connection, connection.cursor() as cursor: - cursor.execute( - SQL( - "SELECT TRUE FROM pg_catalog.pg_user WHERE usename = {} AND usesuper;" - ).format(Literal(user)) - ) - if cursor.fetchone() is not None: - return {"all"} - cursor.execute( - SQL( - "SELECT datname FROM pg_catalog.pg_database WHERE has_database_privilege({}, datname, 'CONNECT') AND NOT datistemplate;" - ).format(Literal(user)) - ) - databases = cursor.fetchall() - return {database[0] for database in databases} - except psycopg2.Error as e: - logger.error(f"Failed to list accessible databases for user {user}: {e}") - raise PostgreSQLListAccessibleDatabasesForUserError() from e - finally: - if connection is not None: - connection.close() - - def list_users(self, group: Optional[str] = None, current_host=False) -> Set[str]: - """Returns the list of PostgreSQL database users. - - Args: - group: optional group to filter the users. - current_host: whether to check the current host - instead of the primary host. - - Returns: - List of PostgreSQL database users. - """ - connection = None - host = self.current_host if current_host else None - try: - with self._connect_to_database( - database_host=host - ) as connection, connection.cursor() as cursor: - if group: - query = SQL( - "SELECT usename FROM (SELECT UNNEST(grolist) AS user_id FROM pg_catalog.pg_group WHERE groname = {}) AS g JOIN pg_catalog.pg_user AS u ON g.user_id = u.usesysid;" - ).format(Literal(group)) - else: - query = "SELECT usename FROM pg_catalog.pg_user;" - cursor.execute(query) - usernames = cursor.fetchall() - return {username[0] for username in usernames} - except psycopg2.Error as e: - logger.error(f"Failed to list PostgreSQL database users: {e}") - raise PostgreSQLListUsersError() from e - finally: - if connection is not None: - connection.close() - - def list_users_from_relation(self, current_host=False) -> Set[str]: - """Returns the list of PostgreSQL database users that were created by a relation. - - Args: - current_host: whether to check the current host - instead of the primary host. - - Returns: - List of PostgreSQL database users. - """ - connection = None - host = self.current_host if current_host else None - try: - with self._connect_to_database( - database_host=host - ) as connection, connection.cursor() as cursor: - cursor.execute( - "SELECT usename " - "FROM pg_catalog.pg_user " - "WHERE usename LIKE 'relation_id_%' OR usename LIKE 'relation-%' " - "OR usename LIKE 'pgbouncer_auth_relation_%' OR usename LIKE '%_user_%_%';" - ) - usernames = cursor.fetchall() - return {username[0] for username in usernames} - except psycopg2.Error as e: - logger.error(f"Failed to list PostgreSQL database users: {e}") - raise PostgreSQLListUsersError() from e - finally: - if connection is not None: - connection.close() - - def list_valid_privileges_and_roles(self) -> Tuple[Set[str], Set[str]]: - """Returns two sets with valid privileges and roles. - - Returns: - Tuple containing two sets: the first with valid privileges - and the second with valid roles. - """ - with self._connect_to_database() as connection, connection.cursor() as cursor: - cursor.execute("SELECT rolname FROM pg_roles;") - return { - "createdb", - "createrole", - "superuser", - }, {role[0] for role in cursor.fetchall() if role[0]} - - def set_up_database(self) -> None: - """Set up postgres database with the right permissions.""" - connection = None - cursor = None - try: - with self._connect_to_database() as connection, connection.cursor() as cursor: - cursor.execute("SELECT TRUE FROM pg_roles WHERE rolname='admin';") - if cursor.fetchone() is None: - # Allow access to the postgres database only to the system users. - cursor.execute("REVOKE ALL PRIVILEGES ON DATABASE postgres FROM PUBLIC;") - cursor.execute("REVOKE CREATE ON SCHEMA public FROM PUBLIC;") - for user in self.system_users: - cursor.execute( - SQL("GRANT ALL PRIVILEGES ON DATABASE postgres TO {};").format( - Identifier(user) - ) - ) - self.create_user( - PERMISSIONS_GROUP_ADMIN, - extra_user_roles=["pg_read_all_data", "pg_write_all_data"], - ) - cursor.execute("GRANT CONNECT ON DATABASE postgres TO admin;") - except psycopg2.Error as e: - logger.error(f"Failed to set up databases: {e}") - raise PostgreSQLDatabasesSetupError() from e - finally: - if cursor is not None: - cursor.close() - if connection is not None: - connection.close() - - def update_user_password( - self, username: str, password: str, database_host: Optional[str] = None - ) -> None: - """Update a user password. - - Args: - username: the user to update the password. - password: the new password for the user. - database_host: the host to connect to. - - Raises: - PostgreSQLUpdateUserPasswordError if the password couldn't be changed. - """ - connection = None - try: - with self._connect_to_database( - database_host=database_host - ) as connection, connection.cursor() as cursor: - cursor.execute(SQL("BEGIN;")) - cursor.execute(SQL("SET LOCAL log_statement = 'none';")) - cursor.execute( - SQL("ALTER USER {} WITH ENCRYPTED PASSWORD '" + password + "';").format( - Identifier(username) - ) - ) - cursor.execute(SQL("COMMIT;")) - except psycopg2.Error as e: - logger.error(f"Failed to update user password: {e}") - raise PostgreSQLUpdateUserPasswordError() from e - finally: - if connection is not None: - connection.close() - - def is_restart_pending(self) -> bool: - """Query pg_settings for pending restart.""" - connection = None - try: - with self._connect_to_database( - database_host=self.current_host - ) as connection, connection.cursor() as cursor: - cursor.execute("SELECT COUNT(*) FROM pg_settings WHERE pending_restart=True;") - return cursor.fetchone()[0] > 0 - except psycopg2.OperationalError: - logger.warning("Failed to connect to PostgreSQL.") - return False - except psycopg2.Error as e: - logger.error(f"Failed to check if restart is pending: {e}") - return False - finally: - if connection: - connection.close() - - @staticmethod - def build_postgresql_group_map(group_map: Optional[str]) -> List[Tuple]: - """Build the PostgreSQL authorization group-map. - - Args: - group_map: serialized group-map with the following format: - =, - =, - ... - - Returns: - List of LDAP group to PostgreSQL group tuples. - """ - if group_map is None: - return [] - - group_mappings = group_map.split(",") - group_mappings = (mapping.strip() for mapping in group_mappings) - group_map_list = [] - - for mapping in group_mappings: - mapping_parts = mapping.split("=") - if len(mapping_parts) != 2: - raise ValueError("The group-map must contain value pairs split by commas") - - ldap_group = mapping_parts[0] - psql_group = mapping_parts[1] - - if psql_group in [*ACCESS_GROUPS, PERMISSIONS_GROUP_ADMIN]: - logger.warning(f"Tried to assign LDAP users to forbidden group: {psql_group}") - continue - - group_map_list.append((ldap_group, psql_group)) - - return group_map_list - - @staticmethod - def build_postgresql_parameters( - config_options: dict, available_memory: int, limit_memory: Optional[int] = None - ) -> Optional[dict]: - """Builds the PostgreSQL parameters. - - Args: - config_options: charm config options containing profile and PostgreSQL parameters. - available_memory: available memory to use in calculation in bytes. - limit_memory: (optional) limit memory to use in calculation in bytes. - - Returns: - Dictionary with the PostgreSQL parameters. - """ - if limit_memory: - available_memory = min(available_memory, limit_memory) - profile = config_options["profile"] - logger.debug(f"Building PostgreSQL parameters for {profile=} and {available_memory=}") - parameters = {} - for config, value in config_options.items(): - # Filter config option not related to PostgreSQL parameters. - if not config.startswith(( - "connection", - "cpu", - "durability", - "instance", - "logging", - "memory", - "optimizer", - "request", - "response", - "session", - "storage", - "vacuum", - )): - continue - parameter = "_".join(config.split("_")[1:]) - if parameter in ["date_style", "time_zone"]: - parameter = "".join(x.capitalize() for x in parameter.split("_")) - elif parameter.startswith("pg_stat_statements"): - parameter = "pg_stat_statements." + parameter.removeprefix("pg_stat_statements_") - elif parameter == "maximum_lag_on_failover": - continue - parameters[parameter] = value - shared_buffers_max_value_in_mb = int(available_memory * 0.4 / 10**6) - shared_buffers_max_value = int(shared_buffers_max_value_in_mb * 10**3 / 8) - if parameters.get("shared_buffers", 0) > shared_buffers_max_value: - raise Exception( - f"Shared buffers config option should be at most 40% of the available memory, which is {shared_buffers_max_value_in_mb}MB" - ) - if profile == "production": - if "shared_buffers" in parameters: - # Convert to bytes to use in the calculation. - shared_buffers = parameters["shared_buffers"] * 8 * 10**3 - else: - # Use 25% of the available memory for shared_buffers. - # and the remaining as cache memory. - shared_buffers = int(available_memory * 0.25) - parameters["shared_buffers"] = f"{int(shared_buffers * 128 / 10**6)}" - effective_cache_size = int(available_memory - shared_buffers) - parameters.update({ - "effective_cache_size": f"{int(effective_cache_size / 10**6) * 128}" - }) - return parameters - - def validate_date_style(self, date_style: str) -> bool: - """Validate a date style against PostgreSQL. - - Returns: - Whether the date style is valid. - """ - try: - with self._connect_to_database( - database_host=self.current_host - ) as connection, connection.cursor() as cursor: - cursor.execute( - SQL( - "SET DateStyle to {};", - ).format(Identifier(date_style)) - ) - return True - except psycopg2.Error: - return False - - def validate_group_map(self, group_map: Optional[str]) -> bool: - """Validate the PostgreSQL authorization group-map. - - Args: - group_map: serialized group-map with the following format: - =, - =, - ... - - Returns: - Whether the group-map is valid. - """ - if group_map is None: - return True - - try: - group_map = self.build_postgresql_group_map(group_map) - except ValueError: - return False - - for _, psql_group in group_map: - with self._connect_to_database() as connection, connection.cursor() as cursor: - query = SQL("SELECT TRUE FROM pg_roles WHERE rolname={};") - query = query.format(Literal(psql_group)) - cursor.execute(query) - - if cursor.fetchone() is None: - return False - - return True - - def is_user_in_hba(self, username: str) -> bool: - """Check if user was added in pg_hba.""" - connection = None - try: - with self._connect_to_database() as connection, connection.cursor() as cursor: - cursor.execute( - SQL( - "SELECT COUNT(*) FROM pg_hba_file_rules WHERE {} = ANY(user_name);" - ).format(Literal(username)) - ) - return cursor.fetchone()[0] > 0 - except psycopg2.Error as e: - logger.debug(f"Failed to check pg_hba: {e}") - return False - finally: - if connection: - connection.close() From 8b4c5f601fb6ab58a4b8ba4202ab0f4d124f8352 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 14:42:36 +0800 Subject: [PATCH 23/29] refactor: address PR review comments - Rename charms/garm-operator/ -> charms/garm/ (redundant -operator suffix) - Remove [[provider]] section from render_garm_toml() and its unit test; OpenStack provider config is out of scope for this scaffold - Remove unused OPENSTACK_PROVIDER_BINARY constant - Clarify _ensure_secrets() docstring: existing secret is re-used on redeploy so GARM keeps the same JWT secret across restarts - Update charms_lint_and_unit.yaml CI matrix path Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/charms_lint_and_unit.yaml | 2 +- charms/{garm-operator => garm}/.gitignore | 0 .../{garm-operator => garm}/charmcraft.yaml | 0 .../data_platform_libs/v0/data_interfaces.py | 0 .../grafana_k8s/v0/grafana_dashboard.py | 0 .../lib/charms/loki_k8s/v1/loki_push_api.py | 0 .../observability_libs/v0/juju_topology.py | 0 .../prometheus_k8s/v0/prometheus_scrape.py | 0 .../lib/charms/redis_k8s/v0/redis.py | 0 .../tempo_coordinator_k8s/v0/tracing.py | 0 .../lib/charms/traefik_k8s/v2/ingress.py | 0 charms/{garm-operator => garm}/pyproject.toml | 0 .../{garm-operator => garm}/requirements.txt | 0 charms/{garm-operator => garm}/src/charm.py | 22 +++++------------- .../{garm-operator => garm}/terraform/main.tf | 0 .../terraform/outputs.tf | 0 .../terraform/variables.tf | 0 .../terraform/versions.tf | 0 .../tests/unit/test_charm.py | 23 ------------------- charms/{garm-operator => garm}/tox.toml | 0 20 files changed, 7 insertions(+), 40 deletions(-) rename charms/{garm-operator => garm}/.gitignore (100%) rename charms/{garm-operator => garm}/charmcraft.yaml (100%) rename charms/{garm-operator => garm}/lib/charms/data_platform_libs/v0/data_interfaces.py (100%) rename charms/{garm-operator => garm}/lib/charms/grafana_k8s/v0/grafana_dashboard.py (100%) rename charms/{garm-operator => garm}/lib/charms/loki_k8s/v1/loki_push_api.py (100%) rename charms/{garm-operator => garm}/lib/charms/observability_libs/v0/juju_topology.py (100%) rename charms/{garm-operator => garm}/lib/charms/prometheus_k8s/v0/prometheus_scrape.py (100%) rename charms/{garm-operator => garm}/lib/charms/redis_k8s/v0/redis.py (100%) rename charms/{garm-operator => garm}/lib/charms/tempo_coordinator_k8s/v0/tracing.py (100%) rename charms/{garm-operator => garm}/lib/charms/traefik_k8s/v2/ingress.py (100%) rename charms/{garm-operator => garm}/pyproject.toml (100%) rename charms/{garm-operator => garm}/requirements.txt (100%) rename charms/{garm-operator => garm}/src/charm.py (87%) rename charms/{garm-operator => garm}/terraform/main.tf (100%) rename charms/{garm-operator => garm}/terraform/outputs.tf (100%) rename charms/{garm-operator => garm}/terraform/variables.tf (100%) rename charms/{garm-operator => garm}/terraform/versions.tf (100%) rename charms/{garm-operator => garm}/tests/unit/test_charm.py (82%) rename charms/{garm-operator => garm}/tox.toml (100%) diff --git a/.github/workflows/charms_lint_and_unit.yaml b/.github/workflows/charms_lint_and_unit.yaml index 1cba77fd..8e2e117b 100644 --- a/.github/workflows/charms_lint_and_unit.yaml +++ b/.github/workflows/charms_lint_and_unit.yaml @@ -35,7 +35,7 @@ jobs: charm: - charms/planner-operator - charms/webhook-gateway-operator - - charms/garm-operator + - charms/garm steps: - uses: actions/checkout@v6 diff --git a/charms/garm-operator/.gitignore b/charms/garm/.gitignore similarity index 100% rename from charms/garm-operator/.gitignore rename to charms/garm/.gitignore diff --git a/charms/garm-operator/charmcraft.yaml b/charms/garm/charmcraft.yaml similarity index 100% rename from charms/garm-operator/charmcraft.yaml rename to charms/garm/charmcraft.yaml diff --git a/charms/garm-operator/lib/charms/data_platform_libs/v0/data_interfaces.py b/charms/garm/lib/charms/data_platform_libs/v0/data_interfaces.py similarity index 100% rename from charms/garm-operator/lib/charms/data_platform_libs/v0/data_interfaces.py rename to charms/garm/lib/charms/data_platform_libs/v0/data_interfaces.py diff --git a/charms/garm-operator/lib/charms/grafana_k8s/v0/grafana_dashboard.py b/charms/garm/lib/charms/grafana_k8s/v0/grafana_dashboard.py similarity index 100% rename from charms/garm-operator/lib/charms/grafana_k8s/v0/grafana_dashboard.py rename to charms/garm/lib/charms/grafana_k8s/v0/grafana_dashboard.py diff --git a/charms/garm-operator/lib/charms/loki_k8s/v1/loki_push_api.py b/charms/garm/lib/charms/loki_k8s/v1/loki_push_api.py similarity index 100% rename from charms/garm-operator/lib/charms/loki_k8s/v1/loki_push_api.py rename to charms/garm/lib/charms/loki_k8s/v1/loki_push_api.py diff --git a/charms/garm-operator/lib/charms/observability_libs/v0/juju_topology.py b/charms/garm/lib/charms/observability_libs/v0/juju_topology.py similarity index 100% rename from charms/garm-operator/lib/charms/observability_libs/v0/juju_topology.py rename to charms/garm/lib/charms/observability_libs/v0/juju_topology.py diff --git a/charms/garm-operator/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/charms/garm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py similarity index 100% rename from charms/garm-operator/lib/charms/prometheus_k8s/v0/prometheus_scrape.py rename to charms/garm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py diff --git a/charms/garm-operator/lib/charms/redis_k8s/v0/redis.py b/charms/garm/lib/charms/redis_k8s/v0/redis.py similarity index 100% rename from charms/garm-operator/lib/charms/redis_k8s/v0/redis.py rename to charms/garm/lib/charms/redis_k8s/v0/redis.py diff --git a/charms/garm-operator/lib/charms/tempo_coordinator_k8s/v0/tracing.py b/charms/garm/lib/charms/tempo_coordinator_k8s/v0/tracing.py similarity index 100% rename from charms/garm-operator/lib/charms/tempo_coordinator_k8s/v0/tracing.py rename to charms/garm/lib/charms/tempo_coordinator_k8s/v0/tracing.py diff --git a/charms/garm-operator/lib/charms/traefik_k8s/v2/ingress.py b/charms/garm/lib/charms/traefik_k8s/v2/ingress.py similarity index 100% rename from charms/garm-operator/lib/charms/traefik_k8s/v2/ingress.py rename to charms/garm/lib/charms/traefik_k8s/v2/ingress.py diff --git a/charms/garm-operator/pyproject.toml b/charms/garm/pyproject.toml similarity index 100% rename from charms/garm-operator/pyproject.toml rename to charms/garm/pyproject.toml diff --git a/charms/garm-operator/requirements.txt b/charms/garm/requirements.txt similarity index 100% rename from charms/garm-operator/requirements.txt rename to charms/garm/requirements.txt diff --git a/charms/garm-operator/src/charm.py b/charms/garm/src/charm.py similarity index 87% rename from charms/garm-operator/src/charm.py rename to charms/garm/src/charm.py index 694cdda9..ad935c52 100644 --- a/charms/garm-operator/src/charm.py +++ b/charms/garm/src/charm.py @@ -19,7 +19,6 @@ CONTAINER_NAME: typing.Final[str] = "app" PEBBLE_SERVICE_NAME: typing.Final[str] = "app" GARM_BINARY: typing.Final[str] = "/usr/local/bin/garm" -OPENSTACK_PROVIDER_BINARY: typing.Final[str] = "/usr/local/bin/garm-provider-openstack" def render_garm_toml( @@ -58,20 +57,6 @@ def render_garm_toml( "disable_auth": True, "enable": True, }, - "provider": [ - { - "name": "openstack", - "provider_type": "external", - "description": "OpenStack provider", - "external": { - # Scaffold: OpenStack config path (populated in future story) - "config_file": "", - "provider_executable": OPENSTACK_PROVIDER_BINARY, - # Scaffold: OpenStack environment variables (populated in future story) - "environment_variables": [], - }, - } - ], } return tomli_w.dumps(config) @@ -142,7 +127,12 @@ def restart(self, rerun_migrations: bool = False) -> None: container.replan() def _ensure_secrets(self) -> None: - """Create the garm-secrets juju secret on first call (leader only).""" + """Create or refresh the garm-secrets Juju secret (leader only). + + On initial deploy the secret is created. On redeploy the existing + secret is re-used; its content is left unchanged so GARM keeps the + same JWT secret across restarts. + """ if not self.unit.is_leader(): return try: diff --git a/charms/garm-operator/terraform/main.tf b/charms/garm/terraform/main.tf similarity index 100% rename from charms/garm-operator/terraform/main.tf rename to charms/garm/terraform/main.tf diff --git a/charms/garm-operator/terraform/outputs.tf b/charms/garm/terraform/outputs.tf similarity index 100% rename from charms/garm-operator/terraform/outputs.tf rename to charms/garm/terraform/outputs.tf diff --git a/charms/garm-operator/terraform/variables.tf b/charms/garm/terraform/variables.tf similarity index 100% rename from charms/garm-operator/terraform/variables.tf rename to charms/garm/terraform/variables.tf diff --git a/charms/garm-operator/terraform/versions.tf b/charms/garm/terraform/versions.tf similarity index 100% rename from charms/garm-operator/terraform/versions.tf rename to charms/garm/terraform/versions.tf diff --git a/charms/garm-operator/tests/unit/test_charm.py b/charms/garm/tests/unit/test_charm.py similarity index 82% rename from charms/garm-operator/tests/unit/test_charm.py rename to charms/garm/tests/unit/test_charm.py index 89d7ca62..36a50e69 100644 --- a/charms/garm-operator/tests/unit/test_charm.py +++ b/charms/garm/tests/unit/test_charm.py @@ -80,29 +80,6 @@ def test_render_garm_toml_metrics_section(): assert parsed["metrics"]["enable"] is True -def test_render_garm_toml_provider_section(): - """ - arrange: Any valid config inputs. - act: Render the GARM TOML config. - assert: The [[provider]] section has the OpenStack provider binary. - """ - result = render_garm_toml( - listen_address="0.0.0.0", - listen_port=9997, - db_path="/srv/garm/data/garm.db", - jwt_secret="abc123", - ) - parsed = tomllib.loads(result) - assert len(parsed["provider"]) == 1 - provider = parsed["provider"][0] - assert provider["name"] == "openstack" - assert provider["provider_type"] == "external" - assert ( - provider["external"]["provider_executable"] - == "/usr/local/bin/garm-provider-openstack" - ) - - # --------------------------------------------------------------------------- # Secret management tests (no Harness needed — test the helper directly) # --------------------------------------------------------------------------- diff --git a/charms/garm-operator/tox.toml b/charms/garm/tox.toml similarity index 100% rename from charms/garm-operator/tox.toml rename to charms/garm/tox.toml From 1d9989863000ab305b407f130fc045841a230b97 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 15:06:18 +0800 Subject: [PATCH 24/29] revert: restore provider section and OPENSTACK_PROVIDER_BINARY Restore [[provider]] outline in render_garm_toml(), OPENSTACK_PROVIDER_BINARY constant, provider unit test, and original _ensure_secrets() docstring. These are kept as scaffold outlines per yanksyoon's feedback. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm/src/charm.py | 20 ++++++++++++++------ charms/garm/tests/unit/test_charm.py | 23 +++++++++++++++++++++++ 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/charms/garm/src/charm.py b/charms/garm/src/charm.py index ad935c52..98189e74 100644 --- a/charms/garm/src/charm.py +++ b/charms/garm/src/charm.py @@ -19,6 +19,7 @@ CONTAINER_NAME: typing.Final[str] = "app" PEBBLE_SERVICE_NAME: typing.Final[str] = "app" GARM_BINARY: typing.Final[str] = "/usr/local/bin/garm" +OPENSTACK_PROVIDER_BINARY: typing.Final[str] = "/usr/local/bin/garm-provider-openstack" def render_garm_toml( @@ -57,6 +58,18 @@ def render_garm_toml( "disable_auth": True, "enable": True, }, + "provider": [ + { + "name": "openstack", + "provider_type": "external", + "description": "OpenStack provider", + "external": { + "config_file": "", + "provider_executable": OPENSTACK_PROVIDER_BINARY, + "environment_variables": [], + }, + } + ], } return tomli_w.dumps(config) @@ -127,12 +140,7 @@ def restart(self, rerun_migrations: bool = False) -> None: container.replan() def _ensure_secrets(self) -> None: - """Create or refresh the garm-secrets Juju secret (leader only). - - On initial deploy the secret is created. On redeploy the existing - secret is re-used; its content is left unchanged so GARM keeps the - same JWT secret across restarts. - """ + """Create the garm-secrets juju secret on first call (leader only).""" if not self.unit.is_leader(): return try: diff --git a/charms/garm/tests/unit/test_charm.py b/charms/garm/tests/unit/test_charm.py index 36a50e69..5df1aa05 100644 --- a/charms/garm/tests/unit/test_charm.py +++ b/charms/garm/tests/unit/test_charm.py @@ -80,6 +80,29 @@ def test_render_garm_toml_metrics_section(): assert parsed["metrics"]["enable"] is True +def test_render_garm_toml_provider_section(): + """ + arrange: Any valid config inputs. + act: Render the GARM TOML config. + assert: The [[provider]] section has the OpenStack provider binary. + """ + result = render_garm_toml( + listen_address="0.0.0.0", + listen_port=9997, + db_path="/etc/garm/garm.db", + jwt_secret="abc123", + ) + parsed = tomllib.loads(result) + assert len(parsed["provider"]) == 1 + provider = parsed["provider"][0] + assert provider["name"] == "openstack" + assert provider["provider_type"] == "external" + assert ( + provider["external"]["provider_executable"] + == "/usr/local/bin/garm-provider-openstack" + ) + + # --------------------------------------------------------------------------- # Secret management tests (no Harness needed — test the helper directly) # --------------------------------------------------------------------------- From 52b1cc52030c30af9d3b3694e78cac4f71ec97a1 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 15:08:02 +0800 Subject: [PATCH 25/29] chore: bump build-base to ubuntu@26.04 (Resolute) Test whether charmcraft/rockcraft support Ubuntu 26.04 end-to-end. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm/charmcraft.yaml | 2 +- garm-rockcraft.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charms/garm/charmcraft.yaml b/charms/garm/charmcraft.yaml index 36595241..6c468985 100644 --- a/charms/garm/charmcraft.yaml +++ b/charms/garm/charmcraft.yaml @@ -5,7 +5,7 @@ name: github-runner-garm type: charm -base: ubuntu@24.04 +base: ubuntu@26.04 platforms: amd64: diff --git a/garm-rockcraft.yaml b/garm-rockcraft.yaml index 42106538..6619efd7 100644 --- a/garm-rockcraft.yaml +++ b/garm-rockcraft.yaml @@ -3,7 +3,7 @@ name: garm base: bare -build-base: ubuntu@24.04 +build-base: ubuntu@26.04 version: "0.1" summary: GARM — GitHub Actions Runner Manager description: | From 156d8c0dcca00393720db91bc8c137e9436340eb Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 15:48:55 +0800 Subject: [PATCH 26/29] revert: revert build-base to ubuntu@24.04 Ubuntu 26.04 (Resolute) is not yet supported by the toolchain. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm/charmcraft.yaml | 2 +- garm-rockcraft.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/charms/garm/charmcraft.yaml b/charms/garm/charmcraft.yaml index 6c468985..36595241 100644 --- a/charms/garm/charmcraft.yaml +++ b/charms/garm/charmcraft.yaml @@ -5,7 +5,7 @@ name: github-runner-garm type: charm -base: ubuntu@26.04 +base: ubuntu@24.04 platforms: amd64: diff --git a/garm-rockcraft.yaml b/garm-rockcraft.yaml index 6619efd7..42106538 100644 --- a/garm-rockcraft.yaml +++ b/garm-rockcraft.yaml @@ -3,7 +3,7 @@ name: garm base: bare -build-base: ubuntu@26.04 +build-base: ubuntu@24.04 version: "0.1" summary: GARM — GitHub Actions Runner Manager description: | From 47b66595f8a5a68d578fbee5c3aefb7b721487a5 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 17:30:48 +0800 Subject: [PATCH 27/29] charms/garm: rename Charmhub charm name to garm Remove the github-runner- prefix from the Charmhub charm name and the Terraform default app_name. GARM already stands for GitHub Actions Runner Manager, so the prefix is redundant. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/garm/charmcraft.yaml | 2 +- charms/garm/terraform/main.tf | 2 +- charms/garm/terraform/variables.tf | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charms/garm/charmcraft.yaml b/charms/garm/charmcraft.yaml index 36595241..50f0c0e4 100644 --- a/charms/garm/charmcraft.yaml +++ b/charms/garm/charmcraft.yaml @@ -1,7 +1,7 @@ # Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. -name: github-runner-garm +name: garm type: charm diff --git a/charms/garm/terraform/main.tf b/charms/garm/terraform/main.tf index 76417fb0..058ea858 100644 --- a/charms/garm/terraform/main.tf +++ b/charms/garm/terraform/main.tf @@ -6,7 +6,7 @@ resource "juju_application" "github_runner_garm" { model_uuid = var.model_uuid charm { - name = "github-runner-garm" + name = "garm" channel = var.channel revision = var.revision base = var.base diff --git a/charms/garm/terraform/variables.tf b/charms/garm/terraform/variables.tf index cc29e0c9..2fe5338c 100644 --- a/charms/garm/terraform/variables.tf +++ b/charms/garm/terraform/variables.tf @@ -4,7 +4,7 @@ variable "app_name" { description = "Name of the application in the Juju model." type = string - default = "github-runner-garm" + default = "garm" } variable "base" { @@ -20,7 +20,7 @@ variable "channel" { } variable "config" { - description = "Application config. See charmhub.io/github-runner-garm/configurations." + description = "Application config. See charmhub.io/garm/configurations." type = map(string) default = {} } From 07c8c5047f43be2233090b260af0b02b9aa568e4 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 18:01:42 +0800 Subject: [PATCH 28/29] ci: trigger CI run Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> From de199f5b3dc5e5fb9811356ff7786aa0ecd3b3a0 Mon Sep 17 00:00:00 2001 From: charlie4284 Date: Mon, 25 May 2026 20:53:52 +0800 Subject: [PATCH 29/29] charms/tests: pre-pull GARM ROCK image and add debug logging The GARM ROCK contains two large statically-linked Go binaries (garm + garm-provider-openstack) making it significantly larger than other charm images. The 600s juju.wait() was expiring while microk8s was still pulling the image from ghcr.io. Fix: call 'microk8s.ctr images pull' before juju.deploy() to warm the local containerd cache. A pull failure is logged as a warning (not fatal) since juju will retry on its own. Debug logging added throughout: - garm_app_image: log the resolved image reference - _pre_pull_garm_image: log pull start, completion, and failures - deploy_garm_app_fixture: log deploy args, wait start, and timeout - _collect_debug_info: on TimeoutError, dump kubectl pods/describe/events - test_garm.py: INFO logs at each test action point Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- charms/tests/integration/conftest.py | 76 ++++++++++++++++++++++++--- charms/tests/integration/test_garm.py | 16 ++++-- 2 files changed, 83 insertions(+), 9 deletions(-) diff --git a/charms/tests/integration/conftest.py b/charms/tests/integration/conftest.py index d2ffa1d5..3d44143c 100644 --- a/charms/tests/integration/conftest.py +++ b/charms/tests/integration/conftest.py @@ -4,6 +4,7 @@ import logging import secrets import string +import subprocess import textwrap from typing import Iterator @@ -326,7 +327,50 @@ def garm_charm_file_fixture(pytestconfig: pytest.Config) -> str | None: @pytest.fixture(name="garm_app_image", scope="module") def garm_app_image_fixture(pytestconfig: pytest.Config) -> str | None: """Return the GARM OCI image reference for the app-image resource.""" - return pytestconfig.getoption(GARM_IMAGE_PARAM) + image = pytestconfig.getoption(GARM_IMAGE_PARAM) + logger.info("GARM app image: %s", image) + return image + + +def _pre_pull_garm_image(image: str) -> None: + """Pre-pull the GARM ROCK image into microk8s containerd. + + The GARM ROCK contains two large statically-linked Go binaries, making it + significantly larger than other charm images. Pre-pulling into the local + containerd cache before deploying prevents the 600s juju.wait() from + expiring while the pod is still downloading the image. + """ + logger.info("Pre-pulling GARM ROCK image into microk8s containerd: %s", image) + try: + result = subprocess.run( + ["sudo", "microk8s.ctr", "images", "pull", image], + check=True, + capture_output=True, + text=True, + timeout=600, + ) + logger.info("GARM image pre-pull complete.\n%s", result.stdout) + except subprocess.CalledProcessError as e: + logger.warning( + "GARM image pre-pull failed (deploy will retry): stderr=%s", e.stderr + ) + except subprocess.TimeoutExpired: + logger.warning("GARM image pre-pull timed out after 600s; proceeding anyway") + + +def _collect_debug_info(app_name: str) -> None: + """Collect k8s and Juju debug information after a deployment failure.""" + logger.error("=== Debug info for failed GARM deployment ===") + for cmd in [ + ["sudo", "microk8s.kubectl", "get", "pods", "-A", "-o", "wide"], + ["sudo", "microk8s.kubectl", "describe", "pods", "-l", f"app.kubernetes.io/name={app_name}"], + ["sudo", "microk8s.kubectl", "get", "events", "--sort-by=.lastTimestamp"], + ]: + try: + out = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + logger.error("$ %s\n%s%s", " ".join(cmd), out.stdout, out.stderr) + except Exception as exc: + logger.error("Failed to run %s: %s", cmd, exc) @pytest.fixture(scope="module", name="garm_app") @@ -337,6 +381,8 @@ def deploy_garm_app_fixture( ) -> str: """Deploy the GARM application and wait for it to become active. + - Pre-pulls the ROCK image into microk8s containerd to avoid image-pull + timeouts during juju.wait() (the GARM ROCK is large: two static Go binaries). - Deploys the GARM charm with the provided ROCK image as the app-image resource. - Waits for the application to reach active status. GARM requires no external integrations at the scaffold stage: secrets are auto-generated by the leader @@ -346,14 +392,32 @@ def deploy_garm_app_fixture( """ app_name = "github-runner-garm" + if garm_app_image: + _pre_pull_garm_image(garm_app_image) + + logger.info( + "Deploying GARM charm: charm_file=%s image=%s app=%s", + garm_charm_file, + garm_app_image, + app_name, + ) juju.deploy( charm=garm_charm_file, app=app_name, resources={"app-image": garm_app_image}, ) - juju.wait( - lambda status: jubilant.all_active(status, app_name), - timeout=10 * 60, - delay=10, - ) + + logger.info("Waiting up to 600s for GARM app '%s' to reach active status", app_name) + try: + juju.wait( + lambda status: jubilant.all_active(status, app_name), + timeout=10 * 60, + delay=10, + ) + except TimeoutError: + logger.error("GARM app '%s' did not reach active status within 600s", app_name) + _collect_debug_info(app_name) + raise + + logger.info("GARM app '%s' is active", app_name) return app_name diff --git a/charms/tests/integration/test_garm.py b/charms/tests/integration/test_garm.py index 399f9a88..6faf1ad3 100644 --- a/charms/tests/integration/test_garm.py +++ b/charms/tests/integration/test_garm.py @@ -4,10 +4,13 @@ """Integration tests for the GARM charm.""" import json +import logging import jubilant import pytest +logger = logging.getLogger(__name__) + GARM_BINARY = "/usr/local/bin/garm" GARM_PROVIDER_BINARY = "/usr/local/bin/garm-provider-openstack" GARM_CONFIG_PATH = "/etc/garm/config.toml" @@ -24,13 +27,14 @@ def test_garm_rock_contains_binaries( assert: Both the GARM server binary and the OpenStack provider binary are present. """ unit = f"{garm_app}/0" + logger.info("Checking GARM binaries in unit %s", unit) result = juju.exec(unit, ["ls", GARM_BINARY, GARM_PROVIDER_BINARY]) assert result.return_code == 0, ( f"Expected GARM binaries at {GARM_BINARY} and {GARM_PROVIDER_BINARY}, " f"got: {result.stderr}" ) - + logger.info("GARM binaries confirmed present: %s", result.stdout.strip()) def test_garm_charm_reaches_active( juju: jubilant.Juju, @@ -42,10 +46,11 @@ def test_garm_charm_reaches_active( assert: The application is in active status, confirming a successful install. """ status = juju.status() + current = status.apps[garm_app].app_status.current + logger.info("GARM app status: %s", current) assert jubilant.all_active(status, garm_app), ( - f"Expected {garm_app} to be active, got: " - f"{status.apps[garm_app].app_status.current}" + f"Expected {garm_app} to be active, got: {current}" ) @@ -59,10 +64,12 @@ def test_garm_pebble_service_command( assert: The Pebble service runs the GARM binary with the canonical config flag. """ unit = f"{garm_app}/0" + logger.info("Reading Pebble plan from unit %s", unit) result = juju.exec(unit, ["pebble", "plan"]) assert result.return_code == 0, f"pebble plan failed: {result.stderr}" plan_output = result.stdout + logger.info("Pebble plan:\n%s", plan_output) assert GARM_BINARY in plan_output, ( f"Expected {GARM_BINARY} in pebble plan, got: {plan_output}" ) @@ -80,6 +87,7 @@ def test_garm_juju_secret_has_expected_keys( act: List Juju secrets and show the garm-secrets secret content. assert: The garm-secrets secret contains the jwt-secret key. """ + logger.info("Listing Juju secrets to find '%s'", GARM_SECRETS_LABEL) secrets_json = juju.cli("secrets", "--format=json") secrets = json.loads(secrets_json) @@ -89,6 +97,7 @@ def test_garm_juju_secret_has_expected_keys( garm_secret_uri = uri break + logger.info("Found GARM secret URI: %s", garm_secret_uri) assert garm_secret_uri is not None, ( f"Expected a Juju secret labelled '{GARM_SECRETS_LABEL}' to exist" ) @@ -96,6 +105,7 @@ def test_garm_juju_secret_has_expected_keys( secret_json = juju.cli("show-secret", "--reveal", "--format=json", garm_secret_uri) secret = json.loads(secret_json) content = secret[garm_secret_uri]["content"]["Data"] + logger.info("GARM secret keys: %s", list(content)) assert "jwt-secret" in content, ( f"Expected 'jwt-secret' key in {GARM_SECRETS_LABEL}, got keys: {list(content)}"