Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,27 @@ jobs:
INTEGRATION_PASSWORD: "${{ matrix.password }}"
INTEGRATION_DATABASE: test
run: uv run pytest -m integration --no-cov

# SQLite is file-based and needs no service container, so it runs as its
# own lean job rather than a matrix entry.
integration-sqlite:
name: "integration / sqlite"
runs-on: ubuntu-latest
permissions:
contents: read

steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3

- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0

- name: Install dependencies
run: uv sync

- name: Run integration tests
env:
INTEGRATION_DB: sqlite
INTEGRATION_DATABASE: "${{ runner.temp }}/integration.db"
run: uv run pytest -m integration --no-cov
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Easily create database dumps with support for redacting data (And replacing that
* MySQL
* PostgreSQL
* Microsoft SQL Server
* SQLite

_More coming soon..._

Expand Down Expand Up @@ -253,6 +254,15 @@ redact:
`connection.driver`. The server's TLS certificate is trusted implicitly
(`TrustServerCertificate=yes`), matching the self-signed certificate SQL
Server ships with.
* `sqlite`: SQLite via `aiosqlite`. There is no server: `connection.database`
is the path to the database file, and host, port and credentials are
neither needed nor used.

````yaml
connection:
type: sqlite
database: ./app.db
````

`connection.schema` selects the schema to dump. It defaults to `public` on
PostgreSQL, `dbo` on SQL Server and the connection database on MySQL. When a
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies = [
"aioodbc>=0.5.0",
"aiofiles>=25.1",
"faker>=40.0",
"aiosqlite>=0.22.1",
]

[project.scripts]
Expand Down
22 changes: 12 additions & 10 deletions redactdump/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,18 @@ def __init__(

self.config = Config(config_path).load_config()

if "username" not in self.config["connection"]:
if user is None:
self.console.print("[red]Connection username is required, either via config or arguments[/red]")
sys.exit(1)
self.config["connection"]["username"] = user
if "password" not in self.config["connection"]:
if password is None:
self.console.print("[red]Connection password is required, either via config or arguments[/red]")
sys.exit(1)
self.config["connection"]["password"] = password
# SQLite opens a file directly and takes no credentials.
if self.config["connection"]["type"] != "sqlite":
if "username" not in self.config["connection"]:
if user is None:
self.console.print("[red]Connection username is required, either via config or arguments[/red]")
sys.exit(1)
self.config["connection"]["username"] = user
if "password" not in self.config["connection"]:
if password is None:
self.console.print("[red]Connection password is required, either via config or arguments[/red]")
sys.exit(1)
self.config["connection"]["password"] = password

self.database = Database(self.config, self.console)
# A dry run must leave the filesystem untouched; constructing File
Expand Down
15 changes: 13 additions & 2 deletions redactdump/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ class ConnectionConfig(StrictModel):
"""Database connection settings."""

type: str
host: str
port: int
host: Optional[str] = None
port: Optional[int] = None
database: str
username: Optional[str] = None
password: Optional[str] = None
Expand All @@ -46,6 +46,17 @@ class ConnectionConfig(StrictModel):
# alias; the config file key is still connection.schema.
db_schema: Optional[str] = Field(default=None, alias="schema")

@model_validator(mode="after")
def require_server_fields(self) -> "ConnectionConfig":
"""Require host and port for server databases.

SQLite connects to a file, named by connection.database; every other
engine needs a server address.
"""
if self.type != "sqlite" and (self.host is None or self.port is None):
raise ValueError("host and port are required unless connection.type is sqlite")
return self


class TableLimit(StrictModel):
"""A per-table row cap and row filter."""
Expand Down
116 changes: 106 additions & 10 deletions redactdump/core/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,23 +162,30 @@ def __init__(self, config: dict, console: Console) -> None:
# certificate SQL Server ships with, so trust it explicitly.
driver = self.config["connection"].get("driver") or MSSQL_DEFAULT_DRIVER
query = {"driver": driver, "TrustServerCertificate": "yes"}
elif self.config["connection"]["type"] == "sqlite":
drivername = "sqlite+aiosqlite"
else:
raise RedactDumpError(
f"Unsupported database engine '{self.config['connection']['type']}'. "
"Supported types: pgsql, postgresql, mysql, mssql."
"Supported types: pgsql, postgresql, mysql, mssql, sqlite."
)

# URL.create escapes every component, so credentials containing
# reserved characters (@, /, :, #) survive the round trip.
url = URL.create(
drivername,
username=self.config["connection"]["username"],
password=self.config["connection"]["password"],
host=self.config["connection"]["host"],
port=self.config["connection"]["port"],
database=self.config["connection"]["database"],
query=query,
)
if drivername == "sqlite+aiosqlite":
# SQLite opens the file named by connection.database; there is
# no server, so the URL carries no credentials, host or port.
url = URL.create(drivername, database=self.config["connection"]["database"])
else:
url = URL.create(
drivername,
username=self.config["connection"]["username"],
password=self.config["connection"]["password"],
host=self.config["connection"]["host"],
port=self.config["connection"]["port"],
database=self.config["connection"]["database"],
query=query,
)
self.engine: AsyncEngine = create_async_engine(url, echo=False)

async def dispose(self) -> None:
Expand Down Expand Up @@ -249,6 +256,8 @@ async def get_tables(self) -> List[Table]:
Returns:
List[str]: A list of tables.
"""
if self.engine.dialect.name == "sqlite":
return await self.sqlite_tables()
if self.configured_schema:
schema = self.configured_schema
elif self.engine.dialect.name == "mysql":
Expand Down Expand Up @@ -306,6 +315,93 @@ async def get_tables(self) -> List[Table]:
tables.append(table_obj)
return tables

@staticmethod
def quote_sqlite(name: str) -> str:
"""Quote an identifier for SQLite.

PRAGMA statements cannot bind parameters, so table names read from
sqlite_master are interpolated; quoting keeps any name valid.

Args:
name (str): The identifier.
"""
escaped = name.replace('"', '""')
return f'"{escaped}"'

async def sqlite_tables(self) -> List[Table]:
"""List tables and their columns for a SQLite database.

SQLite has no information_schema: sqlite_master lists the tables and
PRAGMA table_info supplies each table's columns, nullability,
defaults and primary key positions. Internal sqlite_* tables are
skipped.

Returns:
List[Table]: The tables to dump.
"""
tables: List[Table] = []
async with self.engine.connect() as conn:
async with conn.begin():
result = await conn.execute(
text("SELECT name FROM sqlite_master WHERE type = 'table' AND name NOT LIKE 'sqlite_%'")
)

for row in result:
name = row[0]
if not self.table_selected(name):
if self.config["debug"]["enabled"]:
self.console.print(f"[cyan]DEBUG: Skipping table (table filters): {name}[/cyan]")
continue

columns_result = await conn.execute(text(f"PRAGMA table_info({self.quote_sqlite(name)})"))
table_columns = []
key_positions: List[Tuple[int, str]] = []
for column in columns_result:
if column.pk:
key_positions.append((column.pk, column.name))
if (
not self.config["limits"]["select_columns"]
or column.name in self.config["limits"]["select_columns"]
):
table_columns.append(
TableColumn(column.name, column.type, not column.notnull, column.dflt_value)
)

table_obj = Table(name, table_columns)
table_obj.primary_key = [column for _position, column in sorted(key_positions)]
if self.config["output"].get("ddl"):
table_obj.ddl = await self.build_sqlite_ddl(conn, name)
tables.append(table_obj)
return tables

async def build_sqlite_ddl(self, conn: Any, table_name: str) -> str:
"""Build the CREATE TABLE DDL for a SQLite table.

sqlite_master stores the authoritative CREATE TABLE text (like
MySQL's SHOW CREATE TABLE), foreign keys included; secondary indexes
are separate rows carrying their own CREATE INDEX statements.

Args:
conn (AsyncConnection): An open read connection.
table_name (str): Name of the table.

Returns:
str: The CREATE TABLE statement, plus any secondary indexes.
"""
result = await conn.execute(
text("SELECT sql FROM sqlite_master WHERE type = 'table' AND name = :table"), {"table": table_name}
)
ddl = f"{list(result)[0][0]};"

index_result = await conn.execute(
text("SELECT sql FROM sqlite_master WHERE type = 'index' AND tbl_name = :table AND sql IS NOT NULL"),
{"table": table_name},
)
indexes = [row[0] for row in index_result]
if indexes:
ddl += "\n" + "\n".join(f"{statement};" for statement in indexes)
return ddl

async def get_primary_key(self, conn: Any, table_name: str, schema: str) -> List[str]:
"""Return the table's primary key column names, in key order.

Expand Down
46 changes: 45 additions & 1 deletion redactdump/core/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,28 @@
}
)

DIALECTS = {"mysql": "mysql", "mssql": "mssql"}
# SQLite column types are whatever the table declared (e.g. "DECIMAL(10,5)"),
# so the type is reduced to its base name before this set is consulted; as
# with the other engines it only matters for a numeric column carrying a
# string value.
SQLITE_NUMERIC_TYPES = frozenset(
{
"int",
"integer",
"tinyint",
"smallint",
"mediumint",
"bigint",
"real",
"double",
"double precision",
"float",
"numeric",
"decimal",
}
)

DIALECTS = {"mysql": "mysql", "mssql": "mssql", "sqlite": "sqlite"}


class File:
Expand Down Expand Up @@ -207,6 +228,8 @@ def format_value(column: TableColumn, dialect: str = "postgresql") -> str:
return File._format_value_mysql(value, data_type)
if dialect == "mssql":
return File._format_value_mssql(value, data_type)
if dialect == "sqlite":
return File._format_value_sqlite(value, data_type)
return File._format_value_postgres(value, data_type)

@staticmethod
Expand Down Expand Up @@ -283,6 +306,27 @@ def _format_value_mysql(value: object, data_type: str) -> str:
literal = text.replace("\\", "\\\\").replace("'", "''")
return f"'{literal}'"

@staticmethod
def _format_value_sqlite(value: object, data_type: str) -> str:
"""Render a value as a SQLite literal, driven by the Python type.

SQLite has no boolean type (1/0 by convention), binary becomes an
X'..' hex literal and dict JSON is serialised to text. Backslashes
are literal in SQLite strings, so only single quotes are escaped.
"""
if isinstance(value, bool):
return "1" if value else "0"
if isinstance(value, (int, float, Decimal)):
return str(value)
base_type = data_type.split("(")[0].strip().lower() if data_type else ""
if base_type in SQLITE_NUMERIC_TYPES:
return str(value)
if isinstance(value, (bytes, bytearray, memoryview)):
return f"X'{bytes(value).hex()}'"
text = json.dumps(value) if isinstance(value, dict) else str(value)
literal = text.replace("'", "''")
return f"'{literal}'"

@staticmethod
def _format_value_mssql(value: object, data_type: str) -> str:
"""Render a value as a SQL Server literal, driven by the Python type.
Expand Down
12 changes: 12 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,18 @@ async def dispose(self) -> None:

def resolve(self, sql: str, params: Optional[Dict[str, Any]]) -> List[FakeRow]:
"""Return rows for a statement based on its SQL text."""
if "PRAGMA table_info" in sql:
match = re.search(r'PRAGMA table_info\("(.+)"\)', sql)
name = match.group(1).replace('""', '"') if match else ""
return [FakeRow(column) for column in self.schema.get(name, [])]
if "FROM sqlite_master" in sql:
if "type = 'index'" in sql:
table_name = (params or {}).get("table", "")
return [FakeRow({"sql": statement}) for statement in self.ddl_indexes.get(table_name, [])]
if "name = :table" in sql:
table_name = (params or {}).get("table", "")
return [FakeRow({"sql": self.create_statements.get(table_name, "")})]
return [FakeRow({"name": name}) for name in self.schema]
if "CHARACTER_MAXIMUM_LENGTH" in sql:
table_name = (params or {}).get("table", "")
return [FakeRow(column) for column in self.ddl_columns.get(table_name, [])]
Expand Down
18 changes: 18 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,24 @@ def test_init_credentials_from_args(tmp_path: Path, monkeypatch: pytest.MonkeyPa
assert app.config["connection"]["password"] == "pw"


def test_init_sqlite_needs_no_credentials(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""A SQLite connection constructs without username or password."""
data = {
"connection": {"type": "sqlite", "database": str(tmp_path / "app.db")},
"redact": {"patterns": {"data": []}},
"output": {"type": "file", "location": str(tmp_path / "dump")},
}
path = tmp_path / "config.yaml"
path.write_text(yaml.safe_dump(data))
monkeypatch.setattr("redactdump.app.Database", MagicMock())
monkeypatch.setattr("redactdump.app.File", MagicMock())

app = RedactDump(str(path), None, None)

assert "username" not in app.config["connection"]
assert "password" not in app.config["connection"]


def test_init_dry_run_skips_file_creation(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""A dry run never constructs the file writer, leaving the filesystem untouched."""
config_path = write_config_file(tmp_path, include_credentials=True)
Expand Down
17 changes: 17 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,23 @@ def test_non_string_connection_schema_rejected(tmp_path: Path) -> None:
load(tmp_path, data)


def test_sqlite_connection_without_host_port_accepted(tmp_path: Path) -> None:
"""A SQLite connection needs only its type and database file path."""
data = base_config()
data["connection"] = {"type": "sqlite", "database": "./app.db"}
result = load(tmp_path, data)
assert result["connection"]["type"] == "sqlite"
assert result["connection"]["database"] == "./app.db"


def test_server_connection_requires_host_and_port(tmp_path: Path) -> None:
"""Server databases still require a host and port."""
data = base_config()
data["connection"] = {"type": "pgsql", "database": "test"}
with pytest.raises(RedactDumpError, match="host and port are required"):
load(tmp_path, data)


def test_pattern_replacement_none_allowed(tmp_path: Path) -> None:
"""A null replacement in a pattern is permitted by the schema."""
data = base_config()
Expand Down
Loading
Loading