From f711f0c1f8934b6c7d7de2fb5efdf99dfc32ff0b Mon Sep 17 00:00:00 2001 From: Andrew Lukoshko Date: Tue, 7 Apr 2026 14:00:51 +0200 Subject: [PATCH] feat: optimize build list query and add database indexes for search The build list endpoint (GET /builds/) has several performance issues that make the frontend slow, especially when filtering by project name, ref, or RPM parameters. Query optimizations in get_builds(): - Make the BuildTaskArtifact LEFT OUTER JOIN conditional: only applied when RPM filter params (name, epoch, version, release, arch) are provided. Previously every request paid the cost of this JOIN plus a DISTINCT to deduplicate the multiplied rows. - Move the Pulp API call (get_rpm_packages) outside of generate_query() so it executes once instead of twice when paginating (data query + count query both called generate_query independently). - Reduce eager loading for paginated list queries: skip linked_builds, test_tasks.performance_stats, build_task.performance_stats, and sign_tasks which are not needed for the list view. Single build detail view still loads all relationships. Database indexes (Alembic migration): - GIN trigram indexes (pg_trgm) on build_task_refs.url and git_ref to accelerate LIKE '%pattern%' queries used for project and ref search. Regular B-tree indexes cannot help with infix LIKE patterns. - B-tree indexes on builds (owner_id, released, signed, finished_at), build_tasks.platform_id, and build_artifacts.href for commonly used WHERE filters. - GIN trigram indexes on new_errata_records title and original_title for errata title search. - B-tree indexes on new_errata_records (platform_id, release_status, issued_date) and a GIN trigram + B-tree index on new_errata_references.cve_id for CVE search. Note: the migration requires the pg_trgm PostgreSQL extension which is created automatically if not already present. --- .../f1a2b3c4d5e6_add_search_indexes.py | 145 ++++++++++++++++++ alws/crud/build.py | 92 ++++++----- 2 files changed, 196 insertions(+), 41 deletions(-) create mode 100644 alws/alembic/versions/f1a2b3c4d5e6_add_search_indexes.py diff --git a/alws/alembic/versions/f1a2b3c4d5e6_add_search_indexes.py b/alws/alembic/versions/f1a2b3c4d5e6_add_search_indexes.py new file mode 100644 index 00000000..e7ec1b87 --- /dev/null +++ b/alws/alembic/versions/f1a2b3c4d5e6_add_search_indexes.py @@ -0,0 +1,145 @@ +"""Add indexes for search performance + +Revision ID: f1a2b3c4d5e6 +Revises: e9bb2a44defb +Create Date: 2026-04-07 00:00:00.000000 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "f1a2b3c4d5e6" +down_revision = "e9bb2a44defb" +branch_labels = None +depends_on = None + + +def upgrade(): + # Enable pg_trgm extension for GIN trigram indexes + op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm") + + # BuildTaskRef: url and git_ref are used in LIKE queries for + # project/ref search (crud/build.py:176,185-186). + # pg_trgm GIN indexes support LIKE '%pattern%' efficiently. + op.execute( + "CREATE INDEX IF NOT EXISTS idx_build_task_refs_url_trgm " + "ON build_task_refs USING gin (url gin_trgm_ops)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS idx_build_task_refs_git_ref_trgm " + "ON build_task_refs USING gin (git_ref gin_trgm_ops)" + ) + + # BuildTaskArtifact.href: used in IN() clause for RPM search + # (crud/build.py:204) + op.create_index( + "idx_build_artifacts_href", + "build_artifacts", + ["href"], + unique=False, + ) + + # Build columns used in WHERE filters (crud/build.py:180,209,211,214) + op.create_index( + "ix_builds_owner_id", + "builds", + ["owner_id"], + unique=False, + ) + op.create_index( + "ix_builds_released", + "builds", + ["released"], + unique=False, + ) + op.create_index( + "ix_builds_signed", + "builds", + ["signed"], + unique=False, + ) + op.create_index( + "ix_builds_finished_at", + "builds", + ["finished_at"], + unique=False, + ) + + # BuildTask.platform_id: used in filter (crud/build.py:190) + op.create_index( + "ix_build_tasks_platform_id", + "build_tasks", + ["platform_id"], + unique=False, + ) + + # NewErrataRecord: title fields searched with LIKE + # (crud/errata.py:1298-1299) + op.execute( + "CREATE INDEX IF NOT EXISTS idx_errata_records_title_trgm " + "ON new_errata_records USING gin (title gin_trgm_ops)" + ) + op.execute( + "CREATE INDEX IF NOT EXISTS idx_errata_records_original_title_trgm " + "ON new_errata_records USING gin (original_title gin_trgm_ops)" + ) + + # NewErrataRecord.platform_id: used in filter (crud/errata.py:1303) + op.create_index( + "ix_new_errata_records_platform_id", + "new_errata_records", + ["platform_id"], + unique=False, + ) + + # NewErrataRecord.release_status: used in filter (crud/errata.py:1310) + op.create_index( + "ix_new_errata_records_release_status", + "new_errata_records", + ["release_status"], + unique=False, + ) + + # NewErrataRecord.issued_date: used in ORDER BY (crud/errata.py:1314) + op.create_index( + "ix_new_errata_records_issued_date", + "new_errata_records", + ["issued_date"], + unique=False, + ) + + # NewErrataReference.cve_id: NewErrataRecord.cves is an + # association_proxy through this column. The LIKE query in + # crud/errata.py:1306 resolves to a subquery on cve_id. + op.execute( + "CREATE INDEX IF NOT EXISTS idx_new_errata_references_cve_id_trgm " + "ON new_errata_references USING gin (cve_id gin_trgm_ops)" + ) + op.create_index( + "ix_new_errata_references_cve_id", + "new_errata_references", + ["cve_id"], + unique=False, + ) + + +def downgrade(): + op.drop_index( + "ix_new_errata_references_cve_id", + table_name="new_errata_references", + ) + op.execute("DROP INDEX IF EXISTS idx_new_errata_references_cve_id_trgm") + op.drop_index("ix_new_errata_records_issued_date", table_name="new_errata_records") + op.drop_index("ix_new_errata_records_release_status", table_name="new_errata_records") + op.drop_index("ix_new_errata_records_platform_id", table_name="new_errata_records") + op.execute("DROP INDEX IF EXISTS idx_errata_records_original_title_trgm") + op.execute("DROP INDEX IF EXISTS idx_errata_records_title_trgm") + op.drop_index("ix_build_tasks_platform_id", table_name="build_tasks") + op.drop_index("ix_builds_finished_at", table_name="builds") + op.drop_index("ix_builds_signed", table_name="builds") + op.drop_index("ix_builds_released", table_name="builds") + op.drop_index("ix_builds_owner_id", table_name="builds") + op.drop_index("idx_build_artifacts_href", table_name="build_artifacts") + op.execute("DROP INDEX IF EXISTS idx_build_task_refs_git_ref_trgm") + op.execute("DROP INDEX IF EXISTS idx_build_task_refs_url_trgm") diff --git a/alws/crud/build.py b/alws/crud/build.py index 35ace1b8..56879a85 100644 --- a/alws/crud/build.py +++ b/alws/crud/build.py @@ -127,31 +127,37 @@ async def get_builds( "arch": rpm_arch, } - async def generate_query(count=False): - query = ( - select(models.Build) - .join( - models.Build.tasks, - ) - .join( - models.BuildTask.ref, - ) - .join( - models.BuildTask.artifacts, - isouter=True, - ) - .order_by(models.Build.id.desc()) - .options( - selectinload(models.Build.tasks).selectinload( - models.BuildTask.platform - ), - selectinload(models.Build.tasks).selectinload( - models.BuildTask.ref - ), - selectinload(models.Build.owner), - selectinload(models.Build.tasks).selectinload( - models.BuildTask.artifacts - ), + pulp_hrefs = None + has_rpm_filter = any(rpm_params.values()) + if has_rpm_filter: + pulp_params.update({ + key: value + for key, value in rpm_params.items() + if value is not None + }) + pulp_hrefs = await pulp_client.get_rpm_packages(**pulp_params) + pulp_hrefs = [row["pulp_href"] for row in pulp_hrefs] + + async def generate_query(count=False, minimal=False): + load_options = [ + selectinload(models.Build.tasks).selectinload( + models.BuildTask.platform + ), + selectinload(models.Build.tasks).selectinload( + models.BuildTask.ref + ), + selectinload(models.Build.owner), + selectinload(models.Build.tasks).selectinload( + models.BuildTask.artifacts + ), + selectinload(models.Build.tasks).selectinload( + models.BuildTask.rpm_modules + ), + selectinload(models.Build.platform_flavors), + selectinload(models.Build.products), + ] + if not minimal: + load_options.extend([ selectinload(models.Build.linked_builds), selectinload(models.Build.tasks) .selectinload(models.BuildTask.test_tasks) @@ -160,15 +166,25 @@ async def generate_query(count=False): models.BuildTask.performance_stats ), selectinload(models.Build.sign_tasks), - selectinload(models.Build.tasks).selectinload( - models.BuildTask.rpm_modules - ), - selectinload(models.Build.platform_flavors), - selectinload(models.Build.products), + ]) + query = ( + select(models.Build) + .join( + models.Build.tasks, + ) + .join( + models.BuildTask.ref, ) - .distinct(models.Build.id) + .order_by(models.Build.id.desc()) + .options(*load_options) ) + if has_rpm_filter: + query = query.join( + models.BuildTask.artifacts, + isouter=True, + ).distinct(models.Build.id) + if build_id is not None: query = query.where(models.Build.id == build_id) if project is not None: @@ -190,15 +206,7 @@ async def generate_query(count=False): query = query.filter(models.BuildTask.platform_id == platform_id) if build_task_arch is not None: query = query.filter(models.BuildTask.arch == build_task_arch) - if any(rpm_params.values()): - pulp_params.update({ - key: value - for key, value in rpm_params.items() - if value is not None - }) - # TODO: we can get packages from pulp database - pulp_hrefs = await pulp_client.get_rpm_packages(**pulp_params) - pulp_hrefs = [row["pulp_href"] for row in pulp_hrefs] + if has_rpm_filter: query = query.filter( sqlalchemy.and_( models.BuildTaskArtifact.href.in_(pulp_hrefs), @@ -227,7 +235,9 @@ async def generate_query(count=False): if page_number: return { "builds": ( - (await db.execute(await generate_query())).scalars().all() + (await db.execute(await generate_query(minimal=True))) + .scalars() + .all() ), "total_builds": ( await db.execute(await generate_query(count=True))