Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions refresh_versions.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
charm_major = 1
workload = "16.13"
workload = "16.14"

[snap]
name = "charmed-postgresql"

[snap.revisions]
# amd64
x86_64 = "332"
x86_64 = "360"
# arm64
aarch64 = "331"
aarch64 = "359"
11 changes: 2 additions & 9 deletions spread.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ backends:
CONCIERGE_EXTRA_SNAPS: charmcraft
CONCIERGE_EXTRA_DEBS: pipx
systems:
- ubuntu-22.04:
- ubuntu-24.04:
username: runner
prepare: |
systemctl disable --now unattended-upgrades.service
Expand Down Expand Up @@ -89,15 +89,8 @@ backends:
# Manually pass specific environment variables
environment:
CI: '$(HOST: echo $CI)'
AWS_ACCESS_KEY: '$(HOST: echo $AWS_ACCESS_KEY)'
AWS_SECRET_KEY: '$(HOST: echo $AWS_SECRET_KEY)'
GCP_ACCESS_KEY: '$(HOST: echo $GCP_ACCESS_KEY)'
GCP_SECRET_KEY: '$(HOST: echo $GCP_SECRET_KEY)'
UBUNTU_PRO_TOKEN: '$(HOST: echo $UBUNTU_PRO_TOKEN)'
LANDSCAPE_ACCOUNT_NAME: '$(HOST: echo $LANDSCAPE_ACCOUNT_NAME)'
LANDSCAPE_REGISTRATION_KEY: '$(HOST: echo $LANDSCAPE_REGISTRATION_KEY)'
systems:
- ubuntu-22.04:
- ubuntu-24.04:
username: runner
- ubuntu-24.04-arm:
username: runner
Expand Down
18 changes: 18 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# See LICENSE file for licensing details.
import logging

import jubilant
import pytest

from . import architecture
Expand All @@ -15,3 +16,20 @@ def charm():
# juju bundle files expect local charms to begin with `./` or `/` to distinguish them from
# Charmhub charms.
return f"./postgresql-watcher_ubuntu@24.04-{architecture.architecture}.charm"


@pytest.fixture(scope="module")
def juju(request: pytest.FixtureRequest):
"""Pytest fixture that wraps :meth:`jubilant.with_model`.

This adds command line parameter ``--keep-models`` (see help for details).
"""
model = request.config.getoption("--model")
keep_models = bool(request.config.getoption("--keep-models"))

if model:
juju = jubilant.Juju(model=model)
yield juju
else:
with jubilant.temp_model(keep=keep_models) as juju:
yield juju
21 changes: 13 additions & 8 deletions tests/integration/ha_tests/test_stereo_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@
import logging

import pytest
from constants import RAFT_PARTNER_PREFIX
from pysyncobj.utility import TcpUtility
from pytest_operator.plugin import OpsTest
from tenacity import Retrying, stop_after_delay, wait_fixed
from yaml import safe_load

from ..helpers import APPLICATION_NAME, DATABASE_APP_NAME
from constants import RAFT_PARTNER_PREFIX

from ..helpers import APPLICATION_NAME, DATABASE_APP_NAME, get_machine_from_unit, stop_machine
from .helpers import APPLICATION_NAME as TEST_APP_NAME
from .helpers import (
are_writes_increasing,
Expand Down Expand Up @@ -260,6 +261,7 @@ async def test_replica_shutdown_with_watcher(ops_test: OpsTest, continuous_write
logger.info(f"Shutting down replica: {replica}")

# Shutdown the replica
await stop_machine(ops_test, await get_machine_from_unit(ops_test, replica))
await ops_test.model.destroy_unit(replica, force=True, destroy_storage=False, max_wait=1500)

# Wait for the cluster to stabilize after unit removal
Expand Down Expand Up @@ -339,6 +341,7 @@ async def test_primary_shutdown_with_watcher(ops_test: OpsTest, continuous_write
logger.info(f"Shutting down primary: {original_primary}")

# Shutdown the primary
await stop_machine(ops_test, await get_machine_from_unit(ops_test, original_primary))
await ops_test.model.destroy_unit(
original_primary, force=True, destroy_storage=False, max_wait=1500
)
Expand Down Expand Up @@ -433,6 +436,7 @@ async def test_watcher_shutdown_no_outage(ops_test: OpsTest, continuous_writes)

# Remove the watcher
watcher_unit = ops_test.model.applications[WATCHER_APP_NAME].units[0]
await stop_machine(ops_test, await get_machine_from_unit(ops_test, watcher_unit.name))
await ops_test.model.destroy_unit(watcher_unit.name, force=True, max_wait=300)

# Verify writes continue without interruption
Expand Down Expand Up @@ -519,12 +523,13 @@ async def test_primary_network_isolation_with_watcher(
# Wait for cluster to stabilize with restored network
# The old primary may take time to rejoin after getting a new IP address,
# so we use raise_on_error=False and wait longer
await ops_test.model.wait_for_idle(
apps=[DATABASE_APP_NAME],
timeout=900,
idle_period=30,
raise_on_error=False, # Old primary may be in error while rejoining
)
async with ops_test.fast_forward(fast_interval="60s"):
await ops_test.model.wait_for_idle(
apps=[DATABASE_APP_NAME],
timeout=900,
idle_period=30,
raise_on_error=False, # Old primary may be in error while rejoining
)

# Wait for the old primary to rejoin as replica
# This can take a while as it needs to recover with a new IP
Expand Down
46 changes: 45 additions & 1 deletion tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
import itertools
import json
import logging
import subprocess
from pathlib import Path

import psycopg2
import requests
import yaml
from constants import PEER
from juju.model import Model
from pytest_operator.plugin import OpsTest
from tenacity import (
Expand All @@ -18,6 +18,8 @@
wait_exponential,
)

from constants import PEER

CHARM_BASE = "ubuntu@22.04"
METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
DATABASE_APP_NAME = "postgresql"
Expand Down Expand Up @@ -194,3 +196,45 @@ async def run_command_on_unit(ops_test: OpsTest, unit_name: str, command: str) -
f"Expected command '{command}' to succeed instead it failed: {return_code}"
)
return stdout


async def stop_machine(ops_test: OpsTest, machine_name: str) -> None:
"""Stop the machine where a unit run on.

Args:
ops_test: The ops test framework instance
machine_name: The name of the machine to stop
"""
stop_machine_command = f"lxc stop {machine_name}"
subprocess.check_call(stop_machine_command.split())


### Ported Mysql jubilant helpers


def execute_queries_on_unit(
unit_address: str, username: str, password: str, queries: list[str], database: str
) -> list:
"""Execute given PostgreSQL queries on a unit.

Args:
unit_address: The public IP address of the unit to execute the queries on
username: The PostgreSQL username
password: The PostgreSQL password
queries: A list of queries to execute
database: Database to execute in

Returns:
A list of rows that were potentially queried
"""
with (
psycopg2.connect(
f"dbname='{database}' user='{username}' host='{unit_address}' password='{password}' connect_timeout=10"
) as connection,
connection.cursor() as cursor,
):
for query in queries:
cursor.execute(query)
output = list(itertools.chain(*cursor.fetchall()))

return output
Empty file.
37 changes: 37 additions & 0 deletions tests/integration/high_availability/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python3
# Copyright 2022 Canonical Ltd.
# See LICENSE file for licensing details.

import logging

import pytest
from tenacity import Retrying, stop_after_attempt

from .high_availability_helpers_new import get_app_leader

logger = logging.getLogger(__name__)

DB_TEST_APP_NAME = "postgresql-test-app"


@pytest.fixture()
def continuous_writes(juju):
"""Starts continuous writes to the MySQL cluster for a test and clear the writes at the end."""
application_unit = get_app_leader(juju, DB_TEST_APP_NAME)

logger.info("Clearing continuous writes")
juju.run(unit=application_unit, action="clear-continuous-writes", wait=120).raise_on_failure()

logger.info("Starting continuous writes")

for attempt in Retrying(stop=stop_after_attempt(10), reraise=True):
with attempt:
result = juju.run(unit=application_unit, action="start-continuous-writes")
result.raise_on_failure()

assert result.results["result"] == "True"

yield

logger.info("Clearing continuous writes")
juju.run(unit=application_unit, action="clear-continuous-writes", wait=120).raise_on_failure()
140 changes: 140 additions & 0 deletions tests/integration/high_availability/high_availability_helpers_new.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python3
# Copyright 2025 Canonical Ltd.
# See LICENSE file for licensing details.
from collections.abc import Callable

import jubilant
from jubilant import Juju
from jubilant.statustypes import Status, UnitStatus
from tenacity import Retrying, stop_after_delay, wait_fixed

from constants import PEER

from ..helpers import execute_queries_on_unit

MINUTE_SECS = 60
SERVER_CONFIG_USERNAME = "operator"

JujuModelStatusFn = Callable[[Status], bool]
JujuAppsStatusFn = Callable[[Status, str], bool]


def check_db_units_writes_increment(
juju: Juju,
app_name: str,
app_units: list[str] | None = None,
db_name: str = "postgresql_test_app_database",
) -> None:
"""Ensure that continuous writes is incrementing on all units.

Also, ensure that all continuous writes up to the max written value is available
on all units (ensure that no committed data is lost).
"""
if not app_units:
app_units = get_app_units(juju, app_name)

app_primary = get_db_primary_unit(juju, app_name)
app_max_value = get_db_max_written_value(juju, app_name, app_primary, db_name)

for unit_name in app_units:
for attempt in Retrying(
reraise=True,
stop=stop_after_delay(5 * MINUTE_SECS),
wait=wait_fixed(10),
):
with attempt:
unit_max_value = get_db_max_written_value(juju, app_name, unit_name, db_name)
assert unit_max_value > app_max_value, "Writes not incrementing"
app_max_value = unit_max_value


def get_app_leader(juju: Juju, app_name: str) -> str:
"""Get the leader unit for the given application."""
model_status = juju.status()
app_status = model_status.apps[app_name]
for name, status in app_status.units.items():
if status.leader:
return name

raise Exception("No leader unit found")


def get_app_units(juju: Juju, app_name: str) -> dict[str, UnitStatus]:
"""Get the units for the given application."""
model_status = juju.status()
app_status = model_status.apps[app_name]
return app_status.units


def get_unit_ip(juju: Juju, app_name: str, unit_name: str) -> str:
"""Get the application unit IP."""
model_status = juju.status()
app_status = model_status.apps[app_name]
for name, status in app_status.units.items():
if name == unit_name:
return status.public_address

raise Exception("No application unit found")


def get_db_primary_unit(juju: Juju, app_name: str) -> str:
"""Get the current primary node of the cluster."""
postgresql_primary = get_app_leader(juju, app_name)
task = juju.run(unit=postgresql_primary, action="get-primary", wait=5 * MINUTE_SECS)
task.raise_on_failure()

primary = task.results.get("primary")
if primary != "None":
return primary

raise Exception("No primary node found")


def get_db_max_written_value(
juju: Juju, app_name: str, unit_name: str, db_name: str = "postgresql_test_app_database"
) -> int:
"""Retrieve the max written value in the PostgreSQL database.

Args:
juju: The Juju model.
app_name: The application name.
unit_name: The unit name.
db_name: The database to connect to.
"""
password = get_user_password(juju, app_name, SERVER_CONFIG_USERNAME)

output = execute_queries_on_unit(
get_unit_ip(juju, app_name, unit_name),
SERVER_CONFIG_USERNAME,
password,
["SELECT MAX(number) FROM continuous_writes;"],
db_name,
)
return output[0]


def wait_for_apps_status(jubilant_status_func: JujuAppsStatusFn, *apps: str) -> JujuModelStatusFn:
"""Waits for Juju agents to be idle, and for applications to reach a certain status.

Args:
jubilant_status_func: The Juju apps status function to wait for.
apps: The applications to wait for.

Returns:
Juju model status function.
"""
return lambda status: all((
jubilant.all_agents_idle(status, *apps),
jubilant_status_func(status, *apps),
))


# PG helpers


def get_user_password(juju: Juju, app_name: str, user: str) -> str | None:
Comment thread
dragomirp marked this conversation as resolved.
Fixed
"""Get a system user's password."""
for secret in juju.secrets():
if secret.label == f"{PEER}.{app_name}.app":
revealed_secret = juju.show_secret(secret.uri, reveal=True)
return revealed_secret.content.get(f"{user}-password")
Loading
Loading