1- .PHONY : lint lint-check format test lint-test test-coverage-compare db-export db-import db-sync gbl-admin-db-download gbl-admin-db-unzip gbl-admin-db-restore gbl-admin-db-sync gbl-admin-db-add-latest-btaa-fields gbl-admin-db-import-resources populate-distributions populate-data-dictionaries gbl-admin-db-import-all reindex es-unblock populate-relationships verify-h3-index kamal-reindex kamal-verify-h3-index clear_cache frontend-reset
1+ .PHONY : lint lint-check format test lint-test test-coverage-compare db-export db-import db-sync gbl-admin-db-download gbl-admin-db-unzip gbl-admin-db-restore gbl-admin-db-sync gbl-admin-db-add-latest-btaa-fields gbl-admin-db-import-resources populate-distributions populate-data-dictionaries gbl-admin-db-import-all reindex reindex-benchmark local-clear-search-cache es-unblock populate-relationships verify-h3-index kamal-reindex kamal-verify-h3-index kamal-clear-cache clear_cache frontend-reset ogm-refresh ogm-refresh-all ogm-refresh-repo ogm-status ogm-status-watch ogm-failures
22
33# Load environment variables from .env file if it exists
44-include .env
@@ -47,6 +47,33 @@ GBL_ADMIN_IMPORT_CONFLICT ?= update
4747GBL_ADMIN_DISTRIBUTIONS_BATCH_SIZE ?= 2000
4848KAMAL_APP_ROLE ?= web
4949KAMAL_PYTHON ?= /opt/venv/bin/python
50+ # Local atomic reindex tuning.
51+ REINDEX_CHUNK_SIZE ?= 2000
52+ REINDEX_BULK_SIZE ?= 2000
53+ REINDEX_BULK_MAX_RETRIES ?= 2
54+ REINDEX_FAST_SETTINGS ?= true
55+ REINDEX_FORCE_REPLICAS_ZERO ?= true
56+ REINDEX_ALLOW_PARTIAL ?= false
57+ REINDEX_PRUNE_OLD ?= true
58+ REINDEX_RETAIN_PREVIOUS ?= 1
59+ REINDEX_REMOVE_LEGACY_INDEX ?= true
60+ REINDEX_BENCHMARK ?= false
61+ # Kamal atomic reindex defaults (versioned index + alias swap + prune).
62+ KAMAL_REINDEX_CHUNK_SIZE ?= 1000
63+ KAMAL_REINDEX_BULK_SIZE ?= 2000
64+ KAMAL_REINDEX_BULK_MAX_RETRIES ?= 2
65+ KAMAL_REINDEX_FAST_SETTINGS ?= true
66+ KAMAL_REINDEX_FORCE_REPLICAS_ZERO ?= true
67+ KAMAL_REINDEX_ALLOW_PARTIAL ?= false
68+ KAMAL_REINDEX_PRUNE_OLD ?= true
69+ KAMAL_REINDEX_RETAIN_PREVIOUS ?= 1
70+ KAMAL_REINDEX_REMOVE_LEGACY_INDEX ?= true
71+ # Optional override for remote API base URL used by kamal-clear-cache.
72+ # If unset, the target falls back to APPLICATION_URL from Kamal env.
73+ KAMAL_API_URL ?=
74+ KAMAL_CACHE_TYPE ?= search
75+ OGM_API_URL ?= http://localhost:8000
76+ OGM_STATUS_POLL_SECONDS ?= 5
5077
5178# Run both linting and formatting checks (without modifying files)
5279lint :
@@ -451,14 +478,110 @@ es-unblock:
451478 -d ' {"index.blocks.read_only_allow_delete": null}' || true
452479 @echo " Done. Run 'make reindex' if you need to reindex."
453480
454- # Reindex all resources into Elasticsearch
481+ # Reindex all resources into Elasticsearch using versioned index + alias swap.
455482reindex :
456- @echo " Reindexing all resources into Elasticsearch (same logic as /admin/reindex) ..."
483+ @echo " Atomic reindex (versioned index + alias swap) in local Docker ..."
457484 @docker compose exec -T api bash -lc ' \
458485 set -e; \
459486 : $$ {ELASTICSEARCH_INDEX:=btaa_geospatial_api}; \
460- echo " Index: $$ ELASTICSEARCH_INDEX" ; \
461- cd /app/backend && python3 scripts/reindex_admin.py'
487+ echo " Alias: $$ ELASTICSEARCH_INDEX" ; \
488+ cd /app/backend && \
489+ REINDEX_ATOMIC_CHUNK_SIZE=$(REINDEX_CHUNK_SIZE ) \
490+ REINDEX_ATOMIC_BULK_SIZE=$(REINDEX_BULK_SIZE ) \
491+ REINDEX_ATOMIC_BULK_MAX_RETRIES=$(REINDEX_BULK_MAX_RETRIES ) \
492+ REINDEX_ATOMIC_FAST_SETTINGS=$(REINDEX_FAST_SETTINGS ) \
493+ REINDEX_ATOMIC_FORCE_REPLICAS_ZERO=$(REINDEX_FORCE_REPLICAS_ZERO ) \
494+ REINDEX_ATOMIC_ALLOW_PARTIAL=$(REINDEX_ALLOW_PARTIAL ) \
495+ REINDEX_ATOMIC_PRUNE_OLD=$(REINDEX_PRUNE_OLD ) \
496+ REINDEX_ATOMIC_RETAIN_PREVIOUS=$(REINDEX_RETAIN_PREVIOUS ) \
497+ REINDEX_ATOMIC_REMOVE_LEGACY_INDEX=$(REINDEX_REMOVE_LEGACY_INDEX ) \
498+ REINDEX_ATOMIC_BENCHMARK=$(REINDEX_BENCHMARK ) \
499+ python3 scripts/reindex_atomic.py'
500+ @echo " Reindex complete; clearing local search cache..."
501+ @$(MAKE ) local-clear-search-cache
502+
503+ # Reindex with benchmark timing output enabled.
504+ reindex-benchmark :
505+ @$(MAKE ) reindex REINDEX_BENCHMARK=true
506+
507+ # Clear local API search cache via admin endpoint.
508+ local-clear-search-cache :
509+ @docker compose exec -T api bash -lc ' ADMIN_USER=$${ADMIN_USERNAME:-admin}; ADMIN_PASS=$${ADMIN_PASSWORD:-changeme}; curl -fsS -u "$$ADMIN_USER:$$ADMIN_PASS" -X POST "http://localhost:8000/api/v1/admin/cache/clear?cache_type=search"'
510+
511+ # Refresh OpenGeoMetadata (OGM) harvest for all enabled weekly repos.
512+ # Uses ADMIN_USERNAME/ADMIN_PASSWORD from env or .env (defaults to admin/changeme).
513+ ogm-refresh : ogm-refresh-all
514+
515+ ogm-refresh-all :
516+ @echo " Triggering OGM harvest for all enabled weekly repos via $( OGM_API_URL) ..."
517+ @docker compose exec -T api bash -lc ' \
518+ ADMIN_USER=$$ {ADMIN_USERNAME:-admin}; \
519+ ADMIN_PASS=$$ {ADMIN_PASSWORD:-changeme}; \
520+ curl -fsS -u " $$ ADMIN_USER:$$ ADMIN_PASS" -X POST \
521+ " $( OGM_API_URL) /api/v1/admin/ogm/harvest" \
522+ -H " Content-Type: application/json" \
523+ -d ' \' ' {"ogm_all":true,"ogm_trigger":"weekly"}' \' ' '
524+ @echo
525+ @echo " OGM harvest request submitted."
526+
527+ # Refresh a single OpenGeoMetadata (OGM) repo.
528+ # Usage: make ogm-refresh-repo OGM_REPO_NAME=edu.stanford.purl
529+ ogm-refresh-repo :
530+ @if [ -z " $( OGM_REPO_NAME) " ]; then \
531+ echo " ERROR: OGM_REPO_NAME is required." ; \
532+ echo " Usage: make ogm-refresh-repo OGM_REPO_NAME=edu.stanford.purl" ; \
533+ exit 1; \
534+ fi
535+ @echo " Triggering OGM harvest for repo $( OGM_REPO_NAME) via $( OGM_API_URL) ..."
536+ @docker compose exec -T api bash -lc ' \
537+ ADMIN_USER=$$ {ADMIN_USERNAME:-admin}; \
538+ ADMIN_PASS=$$ {ADMIN_PASSWORD:-changeme}; \
539+ curl -fsS -u " $$ ADMIN_USER:$$ ADMIN_PASS" -X POST \
540+ " $( OGM_API_URL) /api/v1/admin/ogm/harvest" \
541+ -H " Content-Type: application/json" \
542+ -d " {\" ogm_repo_name\" :\" $( OGM_REPO_NAME) \" ,\" ogm_trigger\" :\" manual\" }" '
543+ @echo
544+ @echo " OGM harvest request submitted for $( OGM_REPO_NAME) ."
545+
546+ # Show OGM harvest status snapshot.
547+ # Usage:
548+ # make ogm-status # list recent runs
549+ # make ogm-status OGM_RUN_ID=<run_id> # show one run detail
550+ ogm-status :
551+ @echo " Fetching OGM harvest status from $( OGM_API_URL) ..."
552+ @docker compose exec -T api bash -lc ' \
553+ ADMIN_USER=$$ {ADMIN_USERNAME:-admin}; \
554+ ADMIN_PASS=$$ {ADMIN_PASSWORD:-changeme}; \
555+ if [ -n " $( OGM_RUN_ID) " ]; then \
556+ curl -fsS -u " $$ ADMIN_USER:$$ ADMIN_PASS" \
557+ " $( OGM_API_URL) /api/v1/admin/ogm/harvest/runs/$( OGM_RUN_ID) " ; \
558+ else \
559+ curl -fsS -u " $$ ADMIN_USER:$$ ADMIN_PASS" \
560+ " $( OGM_API_URL) /api/v1/admin/ogm/harvest/runs" ; \
561+ fi'
562+ @echo
563+
564+ # Continuously poll OGM harvest status.
565+ # Usage:
566+ # make ogm-status-watch
567+ # make ogm-status-watch OGM_RUN_ID=<run_id> OGM_STATUS_POLL_SECONDS=3
568+ ogm-status-watch :
569+ @echo " Watching OGM harvest status (every $( OGM_STATUS_POLL_SECONDS) s). Press Ctrl+C to stop."
570+ @while true ; do \
571+ $(MAKE ) --no-print-directory ogm-status OGM_RUN_ID=" $( OGM_RUN_ID) " ; \
572+ sleep $(OGM_STATUS_POLL_SECONDS ) ; \
573+ done
574+
575+ # Show only failed OGM harvest runs with error details.
576+ # Usage: make ogm-failures
577+ ogm-failures :
578+ @echo " Fetching failed OGM harvest runs from $( OGM_API_URL) ..."
579+ @docker compose exec -T api bash -lc ' \
580+ ADMIN_USER=$$ {ADMIN_USERNAME:-admin}; \
581+ ADMIN_PASS=$$ {ADMIN_PASSWORD:-changeme}; \
582+ curl -fsS -u " $$ ADMIN_USER:$$ ADMIN_PASS" \
583+ " $( OGM_API_URL) /api/v1/admin/ogm/harvest/runs" \
584+ | python -c " import json,sys; data=json.load(sys.stdin); failed=[r for r in data.get(\" runs\" ,[]) if r.get(\" ogm_status\" )==\" failed\" ]; print(\" No failed OGM runs found in current history window.\" ) if not failed else [print(\" ogm_id={0} repo={1} trigger={2} started_at={3}\\ nerror={4}\\ n\" .format(r.get(\" ogm_id\" ), r.get(\" ogm_repo_name\" ), r.get(\" ogm_trigger\" ), r.get(\" ogm_started_at\" ), r.get(\" ogm_error\" ) or \" (none)\" )) for r in failed]" '
462585
463586# Populate resource_relationships from resources table (dct_isPartOf_sm, pcdm_memberOf_sm, etc.).
464587# Run after ingest or when relationship columns change. Search "Has part" filter uses DB + ES;
@@ -494,13 +617,25 @@ verify-h3-index:
494617
495618# Reindex all resources into Elasticsearch on Kamal (single role run by default).
496619kamal-reindex :
497- @echo " Reindexing all resources on Kamal (role: $( KAMAL_APP_ROLE) )..."
620+ @echo " Atomic reindex on Kamal (role: $( KAMAL_APP_ROLE) )..."
498621 @if [ -z " $$ KAMAL_SSH_USER" ] || [ -z " $$ KAMAL_HOST" ]; then \
499622 echo " ERROR: KAMAL_SSH_USER and KAMAL_HOST environment variables must be set." ; \
500623 echo " Please source .kamal/secrets first." ; \
501624 exit 1; \
502625 fi
503- @kamal app exec --roles $(KAMAL_APP_ROLE ) " bash -lc 'cd /app/backend && $( KAMAL_PYTHON) scripts/reindex_admin.py'"
626+ @kamal app exec --roles $(KAMAL_APP_ROLE ) " bash -lc 'cd /app/backend && \
627+ REINDEX_ATOMIC_CHUNK_SIZE=$(KAMAL_REINDEX_CHUNK_SIZE ) \
628+ REINDEX_ATOMIC_BULK_SIZE=$(KAMAL_REINDEX_BULK_SIZE ) \
629+ REINDEX_ATOMIC_BULK_MAX_RETRIES=$(KAMAL_REINDEX_BULK_MAX_RETRIES ) \
630+ REINDEX_ATOMIC_FAST_SETTINGS=$(KAMAL_REINDEX_FAST_SETTINGS ) \
631+ REINDEX_ATOMIC_FORCE_REPLICAS_ZERO=$(KAMAL_REINDEX_FORCE_REPLICAS_ZERO ) \
632+ REINDEX_ATOMIC_ALLOW_PARTIAL=$(KAMAL_REINDEX_ALLOW_PARTIAL ) \
633+ REINDEX_ATOMIC_PRUNE_OLD=$(KAMAL_REINDEX_PRUNE_OLD ) \
634+ REINDEX_ATOMIC_RETAIN_PREVIOUS=$(KAMAL_REINDEX_RETAIN_PREVIOUS ) \
635+ REINDEX_ATOMIC_REMOVE_LEGACY_INDEX=$(KAMAL_REINDEX_REMOVE_LEGACY_INDEX ) \
636+ $(KAMAL_PYTHON ) scripts/reindex_atomic.py' "
637+ @echo " Reindex complete; clearing API cache (cache_type: $( KAMAL_CACHE_TYPE) )..."
638+ @$(MAKE ) kamal-clear-cache
504639
505640# Verify H3 pyramid fields on Kamal (single role run by default).
506641kamal-verify-h3-index :
@@ -512,6 +647,22 @@ kamal-verify-h3-index:
512647 fi
513648 @kamal app exec --roles $(KAMAL_APP_ROLE ) " bash -lc 'cd /app/backend && $( KAMAL_PYTHON) scripts/verify_h3_index.py'"
514649
650+ # Clear API cache on Kamal via admin endpoint (defaults to search cache).
651+ # Usage:
652+ # make kamal-clear-cache
653+ # make kamal-clear-cache KAMAL_CACHE_TYPE=all
654+ # make kamal-clear-cache KAMAL_CACHE_TYPE=suggest
655+ kamal-clear-cache :
656+ @echo " Clearing remote cache on Kamal (role: $( KAMAL_APP_ROLE) , cache_type: $( KAMAL_CACHE_TYPE) )..."
657+ @if [ -z " $$ KAMAL_SSH_USER" ] || [ -z " $$ KAMAL_HOST" ]; then \
658+ echo " ERROR: KAMAL_SSH_USER and KAMAL_HOST environment variables must be set." ; \
659+ echo " Please source .kamal/secrets first." ; \
660+ exit 1; \
661+ fi
662+ @kamal app exec --roles $(KAMAL_APP_ROLE ) " bash -lc 'ADMIN_USER=\$ ${ADMIN_USERNAME:- admin} ; ADMIN_PASS=\$ ${ADMIN_PASSWORD:- changeme} ; API_BASE=\" $( KAMAL_API_URL) \" ; if [ -z \"\$ $API_BASE \" ]; then API_BASE=\"\$ $APPLICATION_URL \" ; fi; if [ -z \"\$ $API_BASE \" ]; then echo \" ERROR: KAMAL_API_URL or APPLICATION_URL must be set.\" ; exit 1; fi; API_BASE=\"\$ ${API_BASE%/ } \" ; curl -fsS -u \"\$ $ADMIN_USER :\$ $ADMIN_PASS \" -X POST \"\$ $API_BASE /api/v1/admin/cache/clear?cache_type=$( KAMAL_CACHE_TYPE) \" '"
663+ @echo
664+ @echo " Remote cache clear request submitted."
665+
515666# (Old index_missing_resources target removed; resilient reindex handles verification/repair)
516667
517668# Frontend (Docker dev): clear Vite cache and restart dev server.
0 commit comments