From c037dcc03b75bbbed652cf0382cf82522394669a Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 14:00:23 -0400 Subject: [PATCH 01/77] add a deprefix helper and some unit tests Signed-off-by: Adrian Edwards --- collectoss/application/environment.py | 26 +++++++++++++++++++ .../test_config/test_environment.py | 17 ++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 collectoss/application/environment.py create mode 100644 tests/test_application/test_config/test_environment.py diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py new file mode 100644 index 000000000..4b737912f --- /dev/null +++ b/collectoss/application/environment.py @@ -0,0 +1,26 @@ + +def _deprefix(key: str, prefixes: list[str], separator = "_") -> str: + """Remove a prefix from the provided key + + + Args: + key (str): the key to remove the prefix from + prefixes (list[str]): the prefixes to look for + separator (str, optional): the separator between elements of the key to also remove (if they would otherwise be dangling). Defaults to "_". + + Returns: + str: The key value with the prefix removed if possible, otherwise returns the value of `key` + """ + unprefixed = None + for p in prefixes: + p = p.upper() + k = key.upper() + if k.startswith(p): + unprefixed = key[len(p):] + + if unprefixed.startswith(separator): + unprefixed = unprefixed[len(separator):] + return unprefixed + return key + + diff --git a/tests/test_application/test_config/test_environment.py b/tests/test_application/test_config/test_environment.py new file mode 100644 index 000000000..bca34d92c --- /dev/null +++ b/tests/test_application/test_config/test_environment.py @@ -0,0 +1,17 @@ +from collectoss.application.environment import SystemEnv, _deprefix +import logging + +logger = logging.getLogger(__name__) + +prefixes = ["COLLECTOSS", "OTHER"] + +def test_env_deprefix(): + assert _deprefix("OTHER_DB", prefixes) == "DB" + assert _deprefix("COLLECTOSS_DB", prefixes) == "DB" + +def test_env_deprefix_default(): + assert _deprefix("SOME_DB", prefixes) == "SOME_DB" + assert _deprefix("THINGY_DB", prefixes) == "THINGY_DB" + +def test_env_deprefix_unprefixed(): + assert _deprefix("DB", prefixes) == "DB" From d62214b90ed0c8f8c7068ee206b0ca91ca79cb9b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 14:16:24 -0400 Subject: [PATCH 02/77] refactor into a better prefix extraction helper Signed-off-by: Adrian Edwards --- collectoss/application/environment.py | 21 +++++++++---------- .../test_config/test_environment.py | 19 +++++++++-------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index 4b737912f..4bb02b631 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -1,7 +1,7 @@ +from typing import Optional -def _deprefix(key: str, prefixes: list[str], separator = "_") -> str: - """Remove a prefix from the provided key - +def extract_prefix(key: str, prefixes: list[str], separator = "_") -> Optional[str]: + """Detect and return the prefix present on the provided key Args: key (str): the key to remove the prefix from @@ -9,18 +9,17 @@ def _deprefix(key: str, prefixes: list[str], separator = "_") -> str: separator (str, optional): the separator between elements of the key to also remove (if they would otherwise be dangling). Defaults to "_". Returns: - str: The key value with the prefix removed if possible, otherwise returns the value of `key` + str: The detected prefix (including any separators) if any, otherwise None """ - unprefixed = None + prefix_len = 0 for p in prefixes: p = p.upper() k = key.upper() if k.startswith(p): - unprefixed = key[len(p):] - - if unprefixed.startswith(separator): - unprefixed = unprefixed[len(separator):] - return unprefixed - return key + prefix_len += len(p) + if k[prefix_len] == separator: + prefix_len += len(separator) + return key[0:prefix_len] + return None diff --git a/tests/test_application/test_config/test_environment.py b/tests/test_application/test_config/test_environment.py index bca34d92c..d3248412c 100644 --- a/tests/test_application/test_config/test_environment.py +++ b/tests/test_application/test_config/test_environment.py @@ -1,17 +1,18 @@ -from collectoss.application.environment import SystemEnv, _deprefix +from collectoss.application.environment import SystemEnv, extract_prefix import logging logger = logging.getLogger(__name__) prefixes = ["COLLECTOSS", "OTHER"] -def test_env_deprefix(): - assert _deprefix("OTHER_DB", prefixes) == "DB" - assert _deprefix("COLLECTOSS_DB", prefixes) == "DB" +def test_env_extract_prefix(): + assert extract_prefix("OTHER_DB", prefixes) == "OTHER_" + assert extract_prefix("COLLECTOSS_DB", prefixes) == "COLLECTOSS_" -def test_env_deprefix_default(): - assert _deprefix("SOME_DB", prefixes) == "SOME_DB" - assert _deprefix("THINGY_DB", prefixes) == "THINGY_DB" +def test_env_extract_prefix_default(): + assert extract_prefix("SOME_DB", prefixes) is None + assert extract_prefix("THINGY_DB", prefixes) is None -def test_env_deprefix_unprefixed(): - assert _deprefix("DB", prefixes) == "DB" + +def test_env_extract_prefix_unprefixed(): + assert extract_prefix("DB", prefixes) is None From 4a3d431733320f6474f6e99aad1fb71cd2219375 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 14:21:42 -0400 Subject: [PATCH 03/77] add first pass SystemEnv Signed-off-by: Adrian Edwards --- collectoss/application/environment.py | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index 4bb02b631..3bf9fdb3e 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -1,4 +1,9 @@ from typing import Optional +import os +import warnings +import logging + +logger = logging.getLogger(__name__) def extract_prefix(key: str, prefixes: list[str], separator = "_") -> Optional[str]: """Detect and return the prefix present on the provided key @@ -23,3 +28,35 @@ def extract_prefix(key: str, prefixes: list[str], separator = "_") -> Optional[s return key[0:prefix_len] return None + +class SystemEnv: + """Centralized environment variable access + Built for enabling migration of environment variable names + """ + + _prefixes = ["COLLECTOSS", "AUGUR"] + _warn_prefixes = ["AUGUR"] + _separator = "_" + + @classmethod + def get(cls, key: str, default = None) -> Optional[str]: + # extract the suffix so we can try multiple prefixes + canonical_prefix = extract_prefix(key, cls._prefixes, cls._separator) + suffix = key[len(canonical_prefix):] if canonical_prefix is not None else key + # check prefixes in order and use the first one that has a value + for p in cls._prefixes: + check_key = f"{p}{cls._separator}{suffix}" + value = os.getenv(check_key, None) + + if value is not None: + # emit a warning if configured + if p in cls._warn_prefixes: + msg = ( + f"Environment variable '{check_key}' is deprecated. " + f"Use '{key}' instead. Automatic recovery may be removed in a future version" + ) + logger.warning(msg) + warnings.warn(msg, DeprecationWarning, stacklevel=2) + + return value + return default From 61a564f2315d102f529bcd75c2f17461d64431ea Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 14:39:55 -0400 Subject: [PATCH 04/77] allow different prefixes to be passed in for testing purposes Signed-off-by: Adrian Edwards --- collectoss/application/environment.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index 3bf9fdb3e..7cadc0dfc 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -39,12 +39,12 @@ class SystemEnv: _separator = "_" @classmethod - def get(cls, key: str, default = None) -> Optional[str]: + def get(cls, key: str, default = None, prefixes = _prefixes) -> Optional[str]: # extract the suffix so we can try multiple prefixes - canonical_prefix = extract_prefix(key, cls._prefixes, cls._separator) + canonical_prefix = extract_prefix(key, prefixes, cls._separator) suffix = key[len(canonical_prefix):] if canonical_prefix is not None else key # check prefixes in order and use the first one that has a value - for p in cls._prefixes: + for p in prefixes: check_key = f"{p}{cls._separator}{suffix}" value = os.getenv(check_key, None) From e8cc02fb398845ba7dc1ba082f1e0e6eec8715fc Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 14:40:08 -0400 Subject: [PATCH 05/77] update deprecation message Signed-off-by: Adrian Edwards --- collectoss/application/environment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index 7cadc0dfc..ae9d81bb7 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -53,7 +53,7 @@ def get(cls, key: str, default = None, prefixes = _prefixes) -> Optional[str]: if p in cls._warn_prefixes: msg = ( f"Environment variable '{check_key}' is deprecated. " - f"Use '{key}' instead. Automatic recovery may be removed in a future version" + f"Use '{key}' instead. This automatic recovery may become a failure in a future version " ) logger.warning(msg) warnings.warn(msg, DeprecationWarning, stacklevel=2) From b2f99caa89e28cf79f716ab29d952bc39b72ab44 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 14:42:44 -0400 Subject: [PATCH 06/77] basic functionality unit tests Signed-off-by: Adrian Edwards --- .../test_config/test_environment.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_application/test_config/test_environment.py b/tests/test_application/test_config/test_environment.py index d3248412c..11c1e65aa 100644 --- a/tests/test_application/test_config/test_environment.py +++ b/tests/test_application/test_config/test_environment.py @@ -1,5 +1,6 @@ from collectoss.application.environment import SystemEnv, extract_prefix import logging +import os logger = logging.getLogger(__name__) @@ -16,3 +17,23 @@ def test_env_extract_prefix_default(): def test_env_extract_prefix_unprefixed(): assert extract_prefix("DB", prefixes) is None + +def test_fetching_env(): + # plain + os.environ["COLLECTOSS_NAME"] = "A" + assert SystemEnv.get("COLLECTOSS_NAME") == "A" + + # fallback handling + os.environ["OTHER_THING"] = "B" + assert SystemEnv.get("COLLECTOSS_THING", None, prefixes) == "B" + + # cleanup + del os.environ["COLLECTOSS_NAME"] + del os.environ["OTHER_THING"] + +def test_fetching_env_no_value(): + assert SystemEnv.get("COLLECTOSS_MISSING", None, prefixes) is None + +def test_fetching_env_default(): + assert SystemEnv.get("COLLECTOSS_DEFAULT", "SOME", prefixes) == "SOME" + From 7a737fc8f48f6f3e575b1c3527baaf696d8f3e21 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 15:00:36 -0400 Subject: [PATCH 07/77] handle cases with no known prefix to avoid breaking stuff Signed-off-by: Adrian Edwards --- collectoss/application/environment.py | 4 ++++ tests/test_application/test_config/test_environment.py | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index ae9d81bb7..c2eca8b15 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -59,4 +59,8 @@ def get(cls, key: str, default = None, prefixes = _prefixes) -> Optional[str]: warnings.warn(msg, DeprecationWarning, stacklevel=2) return value + + if not canonical_prefix: + return os.getenv(key, default) + return default diff --git a/tests/test_application/test_config/test_environment.py b/tests/test_application/test_config/test_environment.py index 11c1e65aa..3b31bf950 100644 --- a/tests/test_application/test_config/test_environment.py +++ b/tests/test_application/test_config/test_environment.py @@ -37,3 +37,8 @@ def test_fetching_env_no_value(): def test_fetching_env_default(): assert SystemEnv.get("COLLECTOSS_DEFAULT", "SOME", prefixes) == "SOME" +def test_no_known_prefix(): + # fallback handling + os.environ["THING"] = "C" + assert SystemEnv.get("THING", None, prefixes) == "C" + From f6109aa87b572f783408b6f303e5c7cab41cb808 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 15:10:22 -0400 Subject: [PATCH 08/77] factor out bool fetching class to deduplicate logic Signed-off-by: Adrian Edwards --- collectoss/application/environment.py | 5 ++++ .../test_config/test_environment.py | 29 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index c2eca8b15..33bb0a7f5 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -64,3 +64,8 @@ def get(cls, key: str, default = None, prefixes = _prefixes) -> Optional[str]: return os.getenv(key, default) return default + + @classmethod + def get_bool(cls, key:str, default: bool, prefixes = _prefixes) -> bool: + raw_val = cls.get(key, None, prefixes) + return raw_val.lower() in ('true', '1', 't', 'y', 'yes') if raw_val else default diff --git a/tests/test_application/test_config/test_environment.py b/tests/test_application/test_config/test_environment.py index 3b31bf950..aa00bcb41 100644 --- a/tests/test_application/test_config/test_environment.py +++ b/tests/test_application/test_config/test_environment.py @@ -42,3 +42,32 @@ def test_no_known_prefix(): os.environ["THING"] = "C" assert SystemEnv.get("THING", None, prefixes) == "C" + +def test_get_bool_trues(): + + cases = ["1", "true", "True", "TRUE", "y", "Y", "yes", "Yes"] + + for case in cases: + os.environ["OTHER_BOOL"] = case + assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == True + del os.environ["OTHER_BOOL"] + +def test_get_bool_falses(): + + cases = ["0", "false", "False", "FALSE", "n", "N", "no", "No"] + + for case in cases: + os.environ["OTHER_BOOL"] = case + assert SystemEnv.get_bool("OTHER_BOOL", True, prefixes) == False + del os.environ["OTHER_BOOL"] + +def test_get_bool_default(): + + cases = ["?", "maybe", "Stuff", "333"] + + for case in cases: + os.environ["OTHER_BOOL"] = case + assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == False + del os.environ["OTHER_BOOL"] + + From 67ddbfd930b59a1ab1a72c3d3bf5c563c63e7e42 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 15:20:12 -0400 Subject: [PATCH 09/77] Test to make sure we can potentially migrate the AUGUR_ vars in the code slowly over time Signed-off-by: Adrian Edwards --- tests/test_application/test_config/test_environment.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_application/test_config/test_environment.py b/tests/test_application/test_config/test_environment.py index aa00bcb41..6b62f2ec9 100644 --- a/tests/test_application/test_config/test_environment.py +++ b/tests/test_application/test_config/test_environment.py @@ -31,6 +31,13 @@ def test_fetching_env(): del os.environ["COLLECTOSS_NAME"] del os.environ["OTHER_THING"] +def test_fetching_env_backwards(): + os.environ["COLLECTOSS_NAME"] = "A" + assert SystemEnv.get("OTHER_NAME", None, prefixes) == "A" + + # cleanup + del os.environ["COLLECTOSS_NAME"] + def test_fetching_env_no_value(): assert SystemEnv.get("COLLECTOSS_MISSING", None, prefixes) is None From ddb0ef55cd7a56327ab7be124104c5193ad5af70 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 15:20:28 -0400 Subject: [PATCH 10/77] replace all references to os.getenv with new class Signed-off-by: Adrian Edwards --- collectoss/api/gunicorn_conf.py | 8 +++++--- collectoss/application/cli/__init__.py | 8 +++++--- collectoss/application/cli/api.py | 4 +++- collectoss/application/cli/backend.py | 11 ++++++----- collectoss/application/cli/collection.py | 3 ++- collectoss/application/cli/db.py | 5 +++-- collectoss/application/config.py | 6 ++++-- collectoss/tasks/git/dependency_tasks/core.py | 3 ++- collectoss/tasks/git/scc_value_tasks/core.py | 3 ++- .../git/util/facade_worker/facade_worker/config.py | 4 +++- 10 files changed, 35 insertions(+), 20 deletions(-) diff --git a/collectoss/api/gunicorn_conf.py b/collectoss/api/gunicorn_conf.py index 22c11231a..ee7797471 100644 --- a/collectoss/api/gunicorn_conf.py +++ b/collectoss/api/gunicorn_conf.py @@ -7,6 +7,7 @@ from collectoss.application.db.lib import get_value from collectoss.application.db import dispose_database_engine +from collectoss.application.environment import SystemEnv logger = logging.getLogger(__name__) @@ -20,8 +21,8 @@ workers = multiprocessing.cpu_count() * 2 + 1 umask = 0o007 reload = True - -is_dev = os.getenv("AUGUR_DEV", 'False').lower() in ('true', '1', 't', 'y', 'yes') +# this satisfies the type checker +is_dev = SystemEnv.get_bool("AUGUR_DEV", False) if is_dev: @@ -40,7 +41,8 @@ # set the log location for gunicorn logs_directory = get_value('Logging', 'logs_directory') -is_docker = os.getenv("AUGUR_DOCKER_DEPLOY", 'False').lower() in ('true', '1', 't', 'y', 'yes') +# this syntax satisfies the type checker +is_docker = SystemEnv.get_bool("AUGUR_DOCKER_DEPLOY", False) accesslog = f"{logs_directory}/gunicorn.log" errorlog = f"{logs_directory}/gunicorn.log" diff --git a/collectoss/application/cli/__init__.py b/collectoss/application/cli/__init__.py index 8081d6a8e..b398614e2 100644 --- a/collectoss/application/cli/__init__.py +++ b/collectoss/application/cli/__init__.py @@ -10,7 +10,9 @@ from collectoss.application.db.engine import DatabaseEngine from collectoss.application.db import get_engine, dispose_database_engine -from sqlalchemy.exc import OperationalError +from sqlalchemy.exc import OperationalError +from collectoss.application.environment import SystemEnv + def check_connectivity(urls=["http://chaoss.community", "http://github.com", "http://gitlab.com"], timeout=10.0): @@ -65,11 +67,11 @@ def new_func(ctx, *args, **kwargs): return ctx.invoke(function_db_connection, *args, **kwargs) except OperationalError as e: - db_environment_var = os.getenv("AUGUR_DB") + db_environment_var = SystemEnv.get("AUGUR_DB") # determine the location to print in error string if db_environment_var: - location = f"the AUGUR_DB environment variable\nAUGUR_DB={os.getenv('AUGUR_DB')}" + location = f"the AUGUR_DB environment variable\nAUGUR_DB={SystemEnv.get('AUGUR_DB')}" else: with open("db.config.json", 'r') as f: db_config = json.load(f) diff --git a/collectoss/application/cli/api.py b/collectoss/application/cli/api.py index a8bb9e53b..70fe3a6a1 100644 --- a/collectoss/application/cli/api.py +++ b/collectoss/application/cli/api.py @@ -17,6 +17,8 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext from collectoss.application.cli._cli_util import _broadcast_signal_to_processes, raise_open_file_limit, clear_redis_caches, clear_rabbitmq_messages from collectoss.application.db.lib import get_value +from collectoss.application.environment import SystemEnv + logger = SystemLogger("collectoss", reset_logfiles=False).get_logger() @@ -142,7 +144,7 @@ def get_api_processes(): def is_api_process(process): command = ''.join(process.info['cmdline'][:]).lower() - if os.getenv('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in command: + if SystemEnv.get('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in command: if process.pid != os.getpid(): diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index 8add0ce18..3c5f381c4 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -15,6 +15,7 @@ import requests from redis.exceptions import ConnectionError as RedisConnectionError +from collectoss.application.environment import SystemEnv from collectoss.tasks.start_tasks import collection_monitor, create_collection_status_records from collectoss.tasks.git.facade_tasks import clone_repos from collectoss.tasks.github.contributors import process_contributors @@ -31,7 +32,7 @@ from keyman.KeyClient import KeyClient, KeyPublisher -reset_logs = os.getenv("AUGUR_RESET_LOGS", 'True').lower() in ('true', '1', 't', 'y', 'yes') +reset_logs = SystemEnv.get_bool("AUGUR_RESET_LOGS", True) logger = SystemLogger("collectoss", reset_logfiles=reset_logs).get_logger() @@ -130,7 +131,7 @@ def start(ctx, disable_collection, development, pidfile, port): processes = start_celery_worker_processes((core_worker_count, secondary_worker_count, facade_worker_count), disable_collection) manager.processes = processes - celery_beat_schedule_db = os.getenv("CELERYBEAT_SCHEDULE_DB", "celerybeat-schedule.db") + celery_beat_schedule_db = SystemEnv.get("CELERYBEAT_SCHEDULE_DB", "celerybeat-schedule.db") if os.path.exists(celery_beat_schedule_db): logger.info("Deleting old task schedule") os.remove(celery_beat_schedule_db) @@ -355,10 +356,10 @@ def export_env(config): Exports your GitHub key and database credentials """ - export_file = open(os.getenv('AUGUR_EXPORT_FILE', 'collectoss_export_env.sh'), 'w+') + export_file = open(SystemEnv.get('AUGUR_EXPORT_FILE') or 'collectoss_export_env.sh', 'w+') export_file.write('#!/bin/bash') export_file.write('\n') - env_file = open(os.getenv('AUGUR_ENV_FILE', 'docker_env.txt'), 'w+') + env_file = open(SystemEnv.get('AUGUR_ENV_FILE') or 'docker_env.txt', 'w+') for env_var in config.get_env_config().items(): if "LOG" not in env_var[0]: @@ -403,7 +404,7 @@ def get_backend_processes(): for process in psutil.process_iter(['cmdline', 'name', 'environ']): if process.info['cmdline'] is not None and process.info['environ'] is not None: try: - if os.getenv('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in ''.join(process.info['cmdline'][:]).lower(): + if SystemEnv.get('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in ''.join(process.info['cmdline'][:]).lower(): if process.pid != os.getpid(): process_list.append(process) except (KeyError, FileNotFoundError): diff --git a/collectoss/application/cli/collection.py b/collectoss/application/cli/collection.py index 78b6f5d13..c502dc91e 100644 --- a/collectoss/application/cli/collection.py +++ b/collectoss/application/cli/collection.py @@ -14,6 +14,7 @@ import traceback import sqlalchemy as s +from collectoss.application.environment import SystemEnv from collectoss.tasks.start_tasks import collection_monitor, create_collection_status_records from collectoss.tasks.git.facade_tasks import clone_repos from collectoss.tasks.github.util.github_api_key_handler import GithubApiKeyHandler @@ -237,7 +238,7 @@ def get_collection_processes(): def is_collection_process(process): command = ''.join(process.info['cmdline'][:]).lower() - if os.getenv('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in command: + if SystemEnv.get('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in command: if process.pid != os.getpid(): if "collectossbackendcollection" in command or "celery_app.celery_appbeat" in command: diff --git a/collectoss/application/cli/db.py b/collectoss/application/cli/db.py index 25ea8a88e..fe2250742 100644 --- a/collectoss/application/cli/db.py +++ b/collectoss/application/cli/db.py @@ -28,6 +28,7 @@ process_repo_csv, process_repo_group_csv, ) +from collectoss.application.environment import SystemEnv logger = logging.getLogger(__name__) @@ -379,7 +380,7 @@ def get_api_key(ctx): short_help="Check the ~/.pgpass file for CollectOSS's database credentials", ) def check_pgpass(): - db_environment_var = getenv("AUGUR_DB") + db_environment_var = SystemEnv.get("AUGUR_DB") if db_environment_var: # gets the user, passowrd, host, port, and database_name out of environment variable # assumes database string of structure //:@:/ @@ -495,7 +496,7 @@ def run_psql_command_in_database(target_type, target): logger.error("Invalid target type. Exiting...") exit(1) - db_environment_var = getenv("AUGUR_DB") + db_environment_var = SystemEnv.get("AUGUR_DB") # db_json_file_location = os.getcwd() + "/db.config.json" # db_json_exists = os.path.exists(db_json_file_location) diff --git a/collectoss/application/config.py b/collectoss/application/config.py index 56e6c57ae..051235323 100644 --- a/collectoss/application/config.py +++ b/collectoss/application/config.py @@ -7,6 +7,8 @@ from collectoss.application.db.models import Config from collectoss.application.db.util import execute_session_query, convert_type_of_value from pathlib import Path +from collectoss.application.environment import SystemEnv + import logging def get_development_flag_from_config(): @@ -27,7 +29,7 @@ def get_development_flag_from_config(): return flag def get_development_flag(): - return os.getenv("AUGUR_DEV") or get_development_flag_from_config() or False + return SystemEnv.get("AUGUR_DEV") or get_development_flag_from_config() or False def redact_setting_value(section_name, setting_name, value): value_redacted = value if section_name != "Keys" else "REDACTED" @@ -167,7 +169,7 @@ def __init__(self, logger, session: DatabaseSession, config_sources: list = None JsonConfig(default_config, logger) ] - config_dir = Path(os.getenv("CONFIG_DATADIR", "./")) + config_dir = Path(SystemEnv.get("CONFIG_DATADIR") or "./") config_path = config_dir.joinpath("augur.json") if config_path.exists(): config_sources.append(JsonConfig(json.loads(config_path.read_text(encoding="UTF-8")), logger)) diff --git a/collectoss/tasks/git/dependency_tasks/core.py b/collectoss/tasks/git/dependency_tasks/core.py index a9e74b4e1..0b713de93 100644 --- a/collectoss/tasks/git/dependency_tasks/core.py +++ b/collectoss/tasks/git/dependency_tasks/core.py @@ -2,6 +2,7 @@ import os from collectoss.application.db.models import * from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value, get_session +from collectoss.application.environment import SystemEnv from collectoss.tasks.github.util.github_api_key_handler import GithubApiKeyHandler from collectoss.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc from collectoss.tasks.util.worker_util import parse_json_from_subprocess_call @@ -79,7 +80,7 @@ def generate_scorecard(logger, repo_git): command = '--repo=' + path #this is path where our scorecard project is located - path_to_scorecard = os.getenv('SCORECARD_DIR', os.environ['HOME'] + '/scorecard') + path_to_scorecard = SystemEnv.get('SCORECARD_DIR', os.environ['HOME'] + '/scorecard') #setting the environmental variable which is required by scorecard diff --git a/collectoss/tasks/git/scc_value_tasks/core.py b/collectoss/tasks/git/scc_value_tasks/core.py index 7c9e0bafd..a526af990 100644 --- a/collectoss/tasks/git/scc_value_tasks/core.py +++ b/collectoss/tasks/git/scc_value_tasks/core.py @@ -2,6 +2,7 @@ import os from collectoss.application.db.models import * from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value +from collectoss.application.environment import SystemEnv from collectoss.tasks.util.worker_util import parse_json_from_subprocess_call from collectoss.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path @@ -20,7 +21,7 @@ def value_model(logger,repo_git): logger.info(f"Repo ID: {repo_id}, Path: {path}") logger.info('Running scc...') - path_to_scc = os.getenv('SCC_DIR', os.environ['HOME'] + '/scc') + path_to_scc = SystemEnv.get('SCC_DIR', (SystemEnv.get('HOME') or "~") + '/scc') required_output = parse_json_from_subprocess_call(logger,['./scc', '-f','json','--by-file', path], cwd=path_to_scc) diff --git a/collectoss/tasks/git/util/facade_worker/facade_worker/config.py b/collectoss/tasks/git/util/facade_worker/facade_worker/config.py index 7da6495bd..9db7d8866 100644 --- a/collectoss/tasks/git/util/facade_worker/facade_worker/config.py +++ b/collectoss/tasks/git/util/facade_worker/facade_worker/config.py @@ -40,11 +40,13 @@ from collectoss.application.db.lib import execute_sql from logging import Logger +from collectoss.application.environment import SystemEnv + logger = logging.getLogger(__name__) def get_database_args_from_env(): - db_str = os.getenv("AUGUR_DB") + db_str = SystemEnv.get("AUGUR_DB") try: db_json_file_location = os.getcwd() + "/db.config.json" except FileNotFoundError: From 5b04de6ab9b28cfdeecae1f62c0ad02db3bccc81 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 15:20:48 -0400 Subject: [PATCH 11/77] deprecate older Environment class that is buried in the module tree Signed-off-by: Adrian Edwards --- collectoss/api/view/server/Environment.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/collectoss/api/view/server/Environment.py b/collectoss/api/view/server/Environment.py index 76b8207ca..4d35bc70a 100644 --- a/collectoss/api/view/server/Environment.py +++ b/collectoss/api/view/server/Environment.py @@ -1,4 +1,5 @@ import os +from typing_extensions import deprecated class Environment: """ @@ -7,16 +8,19 @@ class Environment: with subscript notation without needing to deal with the particularities of non-existent values. """ + @deprecated("use collectoss.application.environment.SystemEnv instead") def __init__(self, **kwargs): for (key, value) in kwargs.items(): self[key] = value + @deprecated("use collectoss.application.environment.SystemEnv instead") def setdefault(self, key, value): if not self[key]: self[key] = value return value return self[key] + @deprecated("use collectoss.application.environment.SystemEnv instead") def setall(self, **kwargs): result = {} for (key, value) in kwargs.items(): @@ -24,6 +28,7 @@ def setall(self, **kwargs): result[key] = self[key] self[key] = value + @deprecated("use collectoss.application.environment.SystemEnv instead") def getany(self, *args): result = {} for arg in args: @@ -31,6 +36,7 @@ def getany(self, *args): result[arg] = self[arg] return result + @deprecated("use collectoss.application.environment.SystemEnv instead") def as_type(self, type, key): if self[key]: return type(self[key]) From 827dfb9adb3307e58ddffbd8fa1ca200b4ec7f52 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 15:27:24 -0400 Subject: [PATCH 12/77] get_bool docstring Signed-off-by: Adrian Edwards --- collectoss/application/environment.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index 33bb0a7f5..462386f72 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -67,5 +67,7 @@ def get(cls, key: str, default = None, prefixes = _prefixes) -> Optional[str]: @classmethod def get_bool(cls, key:str, default: bool, prefixes = _prefixes) -> bool: + """gets a value from the environment and cast it to a boolean + """ raw_val = cls.get(key, None, prefixes) return raw_val.lower() in ('true', '1', 't', 'y', 'yes') if raw_val else default From 1963d3592041feea03ea233ac85ca65693560db8 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 15:27:34 -0400 Subject: [PATCH 13/77] basic setter Signed-off-by: Adrian Edwards --- collectoss/application/environment.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index 462386f72..3a28c12a9 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -71,3 +71,10 @@ def get_bool(cls, key:str, default: bool, prefixes = _prefixes) -> bool: """ raw_val = cls.get(key, None, prefixes) return raw_val.lower() in ('true', '1', 't', 'y', 'yes') if raw_val else default + + @classmethod + def set(cls, key: str, value: str, overwrite=True) -> None: + if os.getenv(key) is not None and not overwrite: + return + + os.environ[key] = value \ No newline at end of file From 96509061c67168273220a5d8ac62d3924d101290 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 15:32:05 -0400 Subject: [PATCH 14/77] replace references to os.environ[] with new class Signed-off-by: Adrian Edwards --- collectoss/api/routes/auggie.py | 4 +++- collectoss/application/cli/api.py | 2 +- collectoss/application/cli/backend.py | 8 ++++---- collectoss/application/cli/collection.py | 2 +- collectoss/tasks/git/dependency_tasks/core.py | 4 ++-- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/collectoss/api/routes/auggie.py b/collectoss/api/routes/auggie.py index 18642498f..f02122ae3 100644 --- a/collectoss/api/routes/auggie.py +++ b/collectoss/api/routes/auggie.py @@ -14,6 +14,8 @@ import requests import slack +from collectoss.application.environment import SystemEnv + from ..server import app @@ -326,7 +328,7 @@ def slack_login(): print("slack_login") r = requests.get( - url=f'https://slack.com/api/oauth.v2.access?code={body["code"]}&client_id={os.environ["AUGGIE_CLIENT_ID"]}&client_secret={os.environ["AUGGIE_CLIENT_SECRET"]}&redirect_uri=http%3A%2F%2Flocalhost%3A8080') + url=f'https://slack.com/api/oauth.v2.access?code={body["code"]}&client_id={SystemEnv.get("AUGGIE_CLIENT_ID")}&client_secret={SystemEnv.get("AUGGIE_CLIENT_SECRET")}&redirect_uri=http%3A%2F%2Flocalhost%3A8080') data = r.json() if (data["ok"]): diff --git a/collectoss/application/cli/api.py b/collectoss/application/cli/api.py index 70fe3a6a1..e3e4a5a55 100644 --- a/collectoss/application/cli/api.py +++ b/collectoss/application/cli/api.py @@ -48,7 +48,7 @@ def start(ctx, development, port): raise e if development: - os.environ["AUGUR_DEV"] = "1" + SystemEnv.set("AUGUR_DEV", "1") logger.info("Starting in development mode") try: diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index 3c5f381c4..edffccc1f 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -72,10 +72,10 @@ def start(ctx, disable_collection, development, pidfile, port): raise e if development: - os.environ["AUGUR_DEV"] = "1" + SystemEnv.set("AUGUR_DEV", "1") logger.info("Starting in development mode") - os.environ["AUGUR_PIDFILE"] = pidfile + SystemEnv.set("AUGUR_PIDFILE", pidfile) try: gunicorn_location = os.getcwd() + "/collectoss/api/gunicorn_conf.py" @@ -87,10 +87,10 @@ def start(ctx, disable_collection, development, pidfile, port): if not port: port = get_value("Server", "port") - os.environ["AUGUR_PORT"] = str(port) + SystemEnv.set("AUGUR_PORT", str(port)) if disable_collection: - os.environ["AUGUR_DISABLE_COLLECTION"] = "1" + SystemEnv.set("AUGUR_DISABLE_COLLECTION", "1") core_worker_count = get_value("Celery", 'core_worker_count') secondary_worker_count = get_value("Celery", 'secondary_worker_count') diff --git a/collectoss/application/cli/collection.py b/collectoss/application/cli/collection.py index c502dc91e..369a8de40 100644 --- a/collectoss/application/cli/collection.py +++ b/collectoss/application/cli/collection.py @@ -76,7 +76,7 @@ def start(ctx, development): keypub.publish(key, "gitlab_rest") if development: - os.environ["AUGUR_DEV"] = "1" + SystemEnv.set("AUGUR_DEV", "1") logger.info("Starting in development mode") core_worker_count = get_value("Celery", 'core_worker_count') diff --git a/collectoss/tasks/git/dependency_tasks/core.py b/collectoss/tasks/git/dependency_tasks/core.py index 0b713de93..0e5c55b9e 100644 --- a/collectoss/tasks/git/dependency_tasks/core.py +++ b/collectoss/tasks/git/dependency_tasks/core.py @@ -80,14 +80,14 @@ def generate_scorecard(logger, repo_git): command = '--repo=' + path #this is path where our scorecard project is located - path_to_scorecard = SystemEnv.get('SCORECARD_DIR', os.environ['HOME'] + '/scorecard') + path_to_scorecard = SystemEnv.get('SCORECARD_DIR', (SystemEnv.get('HOME') or "~") + '/scorecard') #setting the environmental variable which is required by scorecard with get_session() as session: #key_handler = GithubRandomKeyAuth(logger) key_handler = GithubApiKeyHandler(logger) - os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key() + SystemEnv.set('GITHUB_AUTH_TOKEN', key_handler.get_random_key()) # This seems outdated #setting the environmental variable which is required by scorecard From dc160799ad2b5c0df5fdb5404fbfc66591add081 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 15:32:24 -0400 Subject: [PATCH 15/77] remove some redundant wrapping code Signed-off-by: Adrian Edwards --- collectoss/tasks/git/dependency_tasks/core.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/collectoss/tasks/git/dependency_tasks/core.py b/collectoss/tasks/git/dependency_tasks/core.py index 0e5c55b9e..21f24246a 100644 --- a/collectoss/tasks/git/dependency_tasks/core.py +++ b/collectoss/tasks/git/dependency_tasks/core.py @@ -83,16 +83,8 @@ def generate_scorecard(logger, repo_git): path_to_scorecard = SystemEnv.get('SCORECARD_DIR', (SystemEnv.get('HOME') or "~") + '/scorecard') #setting the environmental variable which is required by scorecard - - with get_session() as session: - #key_handler = GithubRandomKeyAuth(logger) - key_handler = GithubApiKeyHandler(logger) - SystemEnv.set('GITHUB_AUTH_TOKEN', key_handler.get_random_key()) - - # This seems outdated - #setting the environmental variable which is required by scorecard - #key_handler = GithubApiKeyHandler(session, session.logger) - #os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key() + key_handler = GithubApiKeyHandler(logger) + SystemEnv.set('GITHUB_AUTH_TOKEN', key_handler.get_random_key()) try: required_output = parse_json_from_subprocess_call(logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard) From 1bb1de287ce5b2155a8deb2ad6d46d3ad96b44f7 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 16:28:10 -0400 Subject: [PATCH 16/77] replace references to os.environ.get Signed-off-by: Adrian Edwards --- collectoss/api/routes/auggie.py | 6 +++--- collectoss/application/cli/api.py | 2 +- collectoss/application/cli/backend.py | 4 ++-- collectoss/application/cli/collection.py | 2 +- collectoss/tasks/init/celery_app.py | 2 +- collectoss/tasks/start_tasks.py | 5 +++-- keyman/Orchestrator.py | 5 +++-- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/collectoss/api/routes/auggie.py b/collectoss/api/routes/auggie.py index f02122ae3..6d036045a 100644 --- a/collectoss/api/routes/auggie.py +++ b/collectoss/api/routes/auggie.py @@ -254,7 +254,7 @@ def get_auggie_user(): # return Response(response=response, status=200, mimetype="application/json") ## From Method profile_name = 'collectoss' - if os.environ.get('AUGUR_IS_PROD'): + if SystemEnv.get('AUGUR_IS_PROD'): profile_name = 'default' client = boto3.Session(region_name='us-east-1', profile_name=profile_name).client('dynamodb') response = client.get_item( @@ -280,7 +280,7 @@ def update_auggie_user_tracking(): # return Response(response=response, status=200, mimetype="application/json") ## From Method profile_name = 'collectoss' - if os.environ.get('AUGUR_IS_PROD'): + if SystemEnv.get('AUGUR_IS_PROD'): profile_name = 'default' client = boto3.Session(region_name='us-east-1', profile_name=profile_name).client('dynamodb') response = client.update_item( @@ -342,7 +342,7 @@ def slack_login(): email = user_response["user"]["email"] profile_name = 'collectoss' - if os.environ.get('AUGUR_IS_PROD'): + if SystemEnv.get('AUGUR_IS_PROD'): profile_name = 'default' print("Making Boto3 Session") client = boto3.Session(region_name='us-east-1', diff --git a/collectoss/application/cli/api.py b/collectoss/application/cli/api.py index e3e4a5a55..4f7077a78 100644 --- a/collectoss/application/cli/api.py +++ b/collectoss/application/cli/api.py @@ -38,7 +38,7 @@ def start(ctx, development, port): """Start CollectOSS's backend server.""" try: - if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) except Exception as e: logger.error( diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index edffccc1f..e163ad366 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -62,7 +62,7 @@ def start(ctx, disable_collection, development, pidfile, port): signal.signal(signal.SIGINT, manager.shutdown_signal_handler) try: - if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) except Exception as e: logger.error( @@ -145,7 +145,7 @@ def start(ctx, disable_collection, development, pidfile, port): manager.keypub = keypub if not disable_collection: - if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": orchestrator = subprocess.Popen("python keyman/Orchestrator.py".split()) # Wait for orchestrator startup diff --git a/collectoss/application/cli/collection.py b/collectoss/application/cli/collection.py index 369a8de40..c84d81907 100644 --- a/collectoss/application/cli/collection.py +++ b/collectoss/application/cli/collection.py @@ -46,7 +46,7 @@ def start(ctx, development): """Start CollectOSS's backend server.""" try: - if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) except Exception as e: logger.error( diff --git a/collectoss/tasks/init/celery_app.py b/collectoss/tasks/init/celery_app.py index e14230f99..a33e1e961 100644 --- a/collectoss/tasks/init/celery_app.py +++ b/collectoss/tasks/init/celery_app.py @@ -63,7 +63,7 @@ tasks = start_tasks + github_tasks + gitlab_tasks + git_tasks + materialized_view_tasks + frontend_tasks -if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": +if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": tasks += data_analysis_tasks redis_db_number, redis_conn_string = get_redis_conn_values() diff --git a/collectoss/tasks/start_tasks.py b/collectoss/tasks/start_tasks.py index 1f36dd90b..e5939f240 100644 --- a/collectoss/tasks/start_tasks.py +++ b/collectoss/tasks/start_tasks.py @@ -14,7 +14,8 @@ from collectoss.tasks.github.pull_requests.tasks import * from collectoss.tasks.github.repo_info.tasks import * from collectoss.tasks.github.releases.tasks import * -if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": +from collectoss.application.environment import SystemEnv +if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": from collectoss.tasks.data_analysis import * from collectoss.tasks.github.detect_move.tasks import detect_github_repo_move_core, detect_github_repo_move_secondary from collectoss.tasks.github.releases.tasks import collect_releases @@ -38,7 +39,7 @@ from collectoss.application.db.lib import execute_sql, get_session from collectoss.application.config import SystemConfig -RUNNING_DOCKER = os.environ.get('AUGUR_DOCKER_DEPLOY') == "1" +RUNNING_DOCKER = SystemEnv.get('AUGUR_DOCKER_DEPLOY') == "1" CELERY_GROUP_TYPE = type(group()) CELERY_CHAIN_TYPE = type(chain()) diff --git a/keyman/Orchestrator.py b/keyman/Orchestrator.py index 71cfae8bb..d93a1f064 100644 --- a/keyman/Orchestrator.py +++ b/keyman/Orchestrator.py @@ -4,15 +4,16 @@ import time from keyman.KeyOrchestrationAPI import spec, WaitKeyTimeout, InvalidRequest +from collectoss.application.environment import SystemEnv -if os.environ.get("KEYMAN_DOCKER"): +if SystemEnv.get("KEYMAN_DOCKER"): import sys import redis import logging sys.path.append("/collectoss") - conn = redis.Redis.from_url(os.environ.get("REDIS_CONN_STRING")) + conn = redis.Redis.from_url(SystemEnv.get("REDIS_CONN_STRING")) # Just log to stdout if we're running in docker logger = logging.Logger("KeyOrchestrator") From f5b501923256bdb7bed333dde770136497f4897e Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 16:38:47 -0400 Subject: [PATCH 17/77] Swap out only usage of deprecated Environment class Signed-off-by: Adrian Edwards --- collectoss/api/view/init.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/collectoss/api/view/init.py b/collectoss/api/view/init.py index ab4708793..1ab68912c 100644 --- a/collectoss/api/view/init.py +++ b/collectoss/api/view/init.py @@ -1,13 +1,11 @@ import os from pathlib import Path -from .server import Environment from collectoss.application.logs import SystemLogger import secrets, yaml - -env = Environment() +from collectoss.application.environment import SystemEnv # load configuration files and initialize globals -configFile = Path(env.setdefault("CONFIG_LOCATION", "config.yml")) +configFile = Path(SystemEnv.get("CONFIG_LOCATION") or "config.yml") settings = {} From 202465ccb9be01f53510df5eb8a030cf3f651a9c Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 8 May 2026 16:39:13 -0400 Subject: [PATCH 18/77] Remove no-longer-used Environment class in API Signed-off-by: Adrian Edwards --- collectoss/api/view/server/Environment.py | 58 ----------------------- collectoss/api/view/server/__init__.py | 3 +- 2 files changed, 1 insertion(+), 60 deletions(-) delete mode 100644 collectoss/api/view/server/Environment.py diff --git a/collectoss/api/view/server/Environment.py b/collectoss/api/view/server/Environment.py deleted file mode 100644 index 4d35bc70a..000000000 --- a/collectoss/api/view/server/Environment.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -from typing_extensions import deprecated - -class Environment: - """ - This class is used to make dealing with environment variables easier. It - allows you to set multiple environment variables at once, and to get items - with subscript notation without needing to deal with the particularities of - non-existent values. - """ - @deprecated("use collectoss.application.environment.SystemEnv instead") - def __init__(self, **kwargs): - for (key, value) in kwargs.items(): - self[key] = value - - @deprecated("use collectoss.application.environment.SystemEnv instead") - def setdefault(self, key, value): - if not self[key]: - self[key] = value - return value - return self[key] - - @deprecated("use collectoss.application.environment.SystemEnv instead") - def setall(self, **kwargs): - result = {} - for (key, value) in kwargs.items(): - if self[key]: - result[key] = self[key] - self[key] = value - - @deprecated("use collectoss.application.environment.SystemEnv instead") - def getany(self, *args): - result = {} - for arg in args: - if self[arg]: - result[arg] = self[arg] - return result - - @deprecated("use collectoss.application.environment.SystemEnv instead") - def as_type(self, type, key): - if self[key]: - return type(self[key]) - return None - - def __getitem__(self, key): - return os.getenv(key) - - def __setitem__(self, key, value): - os.environ[key] = str(value) - - def __len__(self)-> int: - return len(os.environ) - - def __str__(self)-> str: - return str(os.environ) - - def __iter__(self): - return (item for item in os.environ.items()) \ No newline at end of file diff --git a/collectoss/api/view/server/__init__.py b/collectoss/api/view/server/__init__.py index e919a597a..98ce903be 100644 --- a/collectoss/api/view/server/__init__.py +++ b/collectoss/api/view/server/__init__.py @@ -1,2 +1 @@ -from .LoginException import LoginException -from .Environment import Environment \ No newline at end of file +from .LoginException import LoginException \ No newline at end of file From da765da11a6c3ebd48a99985dc4d4f825f3cb877 Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Fri, 29 May 2026 17:38:44 -0400 Subject: [PATCH 19/77] Refactor extract_prefix Co-authored-by: Shlok Gilda Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> --- collectoss/application/environment.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index 3a28c12a9..9c5b3cf65 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -16,16 +16,13 @@ def extract_prefix(key: str, prefixes: list[str], separator = "_") -> Optional[s Returns: str: The detected prefix (including any separators) if any, otherwise None """ - prefix_len = 0 + k = key.upper() for p in prefixes: - p = p.upper() - k = key.upper() - if k.startswith(p): - prefix_len += len(p) - - if k[prefix_len] == separator: - prefix_len += len(separator) - return key[0:prefix_len] + p_up = p.upper() + if k == p_up: + return key[:len(p)] + if k.startswith(p_up + separator): + return key[:len(p) + len(separator)] return None From b3e92a2d42807b2ffc2afdf838cd560cbb810852 Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Fri, 29 May 2026 17:39:06 -0400 Subject: [PATCH 20/77] refactor get_bool Co-authored-by: Shlok Gilda Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> --- collectoss/application/environment.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index 9c5b3cf65..22a8c95d4 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -67,7 +67,9 @@ def get_bool(cls, key:str, default: bool, prefixes = _prefixes) -> bool: """gets a value from the environment and cast it to a boolean """ raw_val = cls.get(key, None, prefixes) - return raw_val.lower() in ('true', '1', 't', 'y', 'yes') if raw_val else default + if raw_val is None: + return default + return raw_val.lower() in ('true', '1', 't', 'y', 'yes') @classmethod def set(cls, key: str, value: str, overwrite=True) -> None: From 21a34f1a90633803b6db56f921897ce702192a34 Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Fri, 29 May 2026 17:39:42 -0400 Subject: [PATCH 21/77] fix ~ path expansion in default scorecard value Co-authored-by: Shlok Gilda Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> --- collectoss/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectoss/tasks/git/dependency_tasks/core.py b/collectoss/tasks/git/dependency_tasks/core.py index 21f24246a..3bd2aaab2 100644 --- a/collectoss/tasks/git/dependency_tasks/core.py +++ b/collectoss/tasks/git/dependency_tasks/core.py @@ -80,7 +80,7 @@ def generate_scorecard(logger, repo_git): command = '--repo=' + path #this is path where our scorecard project is located - path_to_scorecard = SystemEnv.get('SCORECARD_DIR', (SystemEnv.get('HOME') or "~") + '/scorecard') + path_to_scorecard = SystemEnv.get('SCORECARD_DIR', os.path.expanduser('~/scorecard')) #setting the environmental variable which is required by scorecard key_handler = GithubApiKeyHandler(logger) From 8945ef8d55899748f57b9085265394249dca0c5a Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 20 May 2026 16:14:40 -0400 Subject: [PATCH 22/77] replace a bunch of env var names the application accesses impact: low due to the new env variable interpretation layer Signed-off-by: Adrian Edwards --- collectoss/api/routes/auggie.py | 6 +++--- collectoss/application/cli/__init__.py | 4 ++-- collectoss/application/cli/api.py | 2 +- collectoss/application/cli/backend.py | 8 ++++---- collectoss/application/cli/collection.py | 2 +- collectoss/application/cli/db.py | 4 ++-- collectoss/application/config.py | 2 +- .../tasks/git/util/facade_worker/facade_worker/config.py | 2 +- collectoss/tasks/init/celery_app.py | 2 +- collectoss/tasks/start_tasks.py | 4 ++-- 10 files changed, 18 insertions(+), 18 deletions(-) diff --git a/collectoss/api/routes/auggie.py b/collectoss/api/routes/auggie.py index 6d036045a..4cde77084 100644 --- a/collectoss/api/routes/auggie.py +++ b/collectoss/api/routes/auggie.py @@ -254,7 +254,7 @@ def get_auggie_user(): # return Response(response=response, status=200, mimetype="application/json") ## From Method profile_name = 'collectoss' - if SystemEnv.get('AUGUR_IS_PROD'): + if SystemEnv.get('COLLECTOSS_IS_PROD'): profile_name = 'default' client = boto3.Session(region_name='us-east-1', profile_name=profile_name).client('dynamodb') response = client.get_item( @@ -280,7 +280,7 @@ def update_auggie_user_tracking(): # return Response(response=response, status=200, mimetype="application/json") ## From Method profile_name = 'collectoss' - if SystemEnv.get('AUGUR_IS_PROD'): + if SystemEnv.get('COLLECTOSS_IS_PROD'): profile_name = 'default' client = boto3.Session(region_name='us-east-1', profile_name=profile_name).client('dynamodb') response = client.update_item( @@ -342,7 +342,7 @@ def slack_login(): email = user_response["user"]["email"] profile_name = 'collectoss' - if SystemEnv.get('AUGUR_IS_PROD'): + if SystemEnv.get('COLLECTOSS_IS_PROD'): profile_name = 'default' print("Making Boto3 Session") client = boto3.Session(region_name='us-east-1', diff --git a/collectoss/application/cli/__init__.py b/collectoss/application/cli/__init__.py index b398614e2..18fac2f0a 100644 --- a/collectoss/application/cli/__init__.py +++ b/collectoss/application/cli/__init__.py @@ -67,11 +67,11 @@ def new_func(ctx, *args, **kwargs): return ctx.invoke(function_db_connection, *args, **kwargs) except OperationalError as e: - db_environment_var = SystemEnv.get("AUGUR_DB") + db_environment_var = SystemEnv.get("COLLECTOSS_DB") # determine the location to print in error string if db_environment_var: - location = f"the AUGUR_DB environment variable\nAUGUR_DB={SystemEnv.get('AUGUR_DB')}" + location = f"the AUGUR_DB environment variable\nAUGUR_DB={SystemEnv.get('COLLECTOSS_DB')}" else: with open("db.config.json", 'r') as f: db_config = json.load(f) diff --git a/collectoss/application/cli/api.py b/collectoss/application/cli/api.py index 4f7077a78..0c567c590 100644 --- a/collectoss/application/cli/api.py +++ b/collectoss/application/cli/api.py @@ -38,7 +38,7 @@ def start(ctx, development, port): """Start CollectOSS's backend server.""" try: - if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) except Exception as e: logger.error( diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index e163ad366..3526a3c2c 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -62,7 +62,7 @@ def start(ctx, disable_collection, development, pidfile, port): signal.signal(signal.SIGINT, manager.shutdown_signal_handler) try: - if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) except Exception as e: logger.error( @@ -145,7 +145,7 @@ def start(ctx, disable_collection, development, pidfile, port): manager.keypub = keypub if not disable_collection: - if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": orchestrator = subprocess.Popen("python keyman/Orchestrator.py".split()) # Wait for orchestrator startup @@ -356,10 +356,10 @@ def export_env(config): Exports your GitHub key and database credentials """ - export_file = open(SystemEnv.get('AUGUR_EXPORT_FILE') or 'collectoss_export_env.sh', 'w+') + export_file = open(SystemEnv.get('COLLECTOSS_EXPORT_FILE') or 'collectoss_export_env.sh', 'w+') export_file.write('#!/bin/bash') export_file.write('\n') - env_file = open(SystemEnv.get('AUGUR_ENV_FILE') or 'docker_env.txt', 'w+') + env_file = open(SystemEnv.get('COLLECTOSS_ENV_FILE') or 'docker_env.txt', 'w+') for env_var in config.get_env_config().items(): if "LOG" not in env_var[0]: diff --git a/collectoss/application/cli/collection.py b/collectoss/application/cli/collection.py index c84d81907..adf4b50e8 100644 --- a/collectoss/application/cli/collection.py +++ b/collectoss/application/cli/collection.py @@ -46,7 +46,7 @@ def start(ctx, development): """Start CollectOSS's backend server.""" try: - if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) except Exception as e: logger.error( diff --git a/collectoss/application/cli/db.py b/collectoss/application/cli/db.py index fe2250742..e43e472aa 100644 --- a/collectoss/application/cli/db.py +++ b/collectoss/application/cli/db.py @@ -380,7 +380,7 @@ def get_api_key(ctx): short_help="Check the ~/.pgpass file for CollectOSS's database credentials", ) def check_pgpass(): - db_environment_var = SystemEnv.get("AUGUR_DB") + db_environment_var = SystemEnv.get("COLLECTOSS_DB") if db_environment_var: # gets the user, passowrd, host, port, and database_name out of environment variable # assumes database string of structure //:@:/ @@ -496,7 +496,7 @@ def run_psql_command_in_database(target_type, target): logger.error("Invalid target type. Exiting...") exit(1) - db_environment_var = SystemEnv.get("AUGUR_DB") + db_environment_var = SystemEnv.get("COLLECTOSS_DB") # db_json_file_location = os.getcwd() + "/db.config.json" # db_json_exists = os.path.exists(db_json_file_location) diff --git a/collectoss/application/config.py b/collectoss/application/config.py index 051235323..16f62b5ad 100644 --- a/collectoss/application/config.py +++ b/collectoss/application/config.py @@ -29,7 +29,7 @@ def get_development_flag_from_config(): return flag def get_development_flag(): - return SystemEnv.get("AUGUR_DEV") or get_development_flag_from_config() or False + return SystemEnv.get("COLLECTOSS_DEV") or get_development_flag_from_config() or False def redact_setting_value(section_name, setting_name, value): value_redacted = value if section_name != "Keys" else "REDACTED" diff --git a/collectoss/tasks/git/util/facade_worker/facade_worker/config.py b/collectoss/tasks/git/util/facade_worker/facade_worker/config.py index 9db7d8866..f6d5aa465 100644 --- a/collectoss/tasks/git/util/facade_worker/facade_worker/config.py +++ b/collectoss/tasks/git/util/facade_worker/facade_worker/config.py @@ -46,7 +46,7 @@ def get_database_args_from_env(): - db_str = SystemEnv.get("AUGUR_DB") + db_str = SystemEnv.get("COLLECTOSS_DB") try: db_json_file_location = os.getcwd() + "/db.config.json" except FileNotFoundError: diff --git a/collectoss/tasks/init/celery_app.py b/collectoss/tasks/init/celery_app.py index a33e1e961..22fd34872 100644 --- a/collectoss/tasks/init/celery_app.py +++ b/collectoss/tasks/init/celery_app.py @@ -63,7 +63,7 @@ tasks = start_tasks + github_tasks + gitlab_tasks + git_tasks + materialized_view_tasks + frontend_tasks -if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": +if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": tasks += data_analysis_tasks redis_db_number, redis_conn_string = get_redis_conn_values() diff --git a/collectoss/tasks/start_tasks.py b/collectoss/tasks/start_tasks.py index e5939f240..51bf25cd7 100644 --- a/collectoss/tasks/start_tasks.py +++ b/collectoss/tasks/start_tasks.py @@ -15,7 +15,7 @@ from collectoss.tasks.github.repo_info.tasks import * from collectoss.tasks.github.releases.tasks import * from collectoss.application.environment import SystemEnv -if SystemEnv.get('AUGUR_DOCKER_DEPLOY') != "1": +if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": from collectoss.tasks.data_analysis import * from collectoss.tasks.github.detect_move.tasks import detect_github_repo_move_core, detect_github_repo_move_secondary from collectoss.tasks.github.releases.tasks import collect_releases @@ -39,7 +39,7 @@ from collectoss.application.db.lib import execute_sql, get_session from collectoss.application.config import SystemConfig -RUNNING_DOCKER = SystemEnv.get('AUGUR_DOCKER_DEPLOY') == "1" +RUNNING_DOCKER = SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') == "1" CELERY_GROUP_TYPE = type(group()) CELERY_CHAIN_TYPE = type(chain()) From 27fd27e8186ce7903b0bf271b172af60818e363d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 20 May 2026 16:16:06 -0400 Subject: [PATCH 23/77] update env var names in RST docs Signed-off-by: Adrian Edwards --- docs/source/deployment/production.rst | 16 ++++----- .../create-a-metric/api-development.rst | 6 ++-- .../create-a-metric/metrics-steps.rst | 2 +- .../workers/creating_a_new_worker.rst | 2 +- docs/source/docker/docker-compose.rst | 12 +++---- docs/source/docker/getting-started.rst | 24 ++++++------- docs/source/docker/quick-start.rst | 28 +++++++-------- .../command-line-interface/backend.rst | 36 +++++++++---------- .../command-line-interface/configure.rst | 16 ++++----- docs/source/getting-started/using-docker.rst | 16 ++++----- 10 files changed, 79 insertions(+), 79 deletions(-) diff --git a/docs/source/deployment/production.rst b/docs/source/deployment/production.rst index e65a987f1..614737256 100644 --- a/docs/source/deployment/production.rst +++ b/docs/source/deployment/production.rst @@ -11,10 +11,10 @@ Environment Variables CollectOSS uses several environment variables in production. Make sure to configure the ones relevant to your deployment: -- ``AUGUR_RESET_LOGS`` : Controls automatic log reset on server startup -- ``AUGUR_DB`` : PostgreSQL database connection string (used if variable not set) +- ``COLLECTOSS_RESET_LOGS`` : Controls automatic log reset on server startup +- ``COLLECTOSS_DB`` : PostgreSQL database connection string (used if variable not set) -AUGUR_RESET_LOGS +COLLECTOSS_RESET_LOGS ---------------- **Description:** @@ -27,7 +27,7 @@ boolean `True` : CollectOSS clears old logs at startup. **Environment Variable:** -AUGUR_RESET_LOGS +COLLECTOSS_RESET_LOGS **Notes:** If set to `False`, CollectOSS will not reset logs automatically. Administrators must ensure log rotation or cleanup is handled manually. @@ -36,9 +36,9 @@ If set to `False`, CollectOSS will not reset logs automatically. Administrators .. code-block:: bash - export AUGUR_RESET_LOGS=False + export COLLECTOSS_RESET_LOGS=False -AUGUR_DB +COLLECTOSS_DB -------- **Description:** @@ -48,10 +48,10 @@ Specifies the connection string for the PostgreSQL database used by CollectOSS. string **Default:** -Docker container database (if `AUGUR_DB` is not specified) +Docker container database (if `COLLECTOSS_DB` is not specified) **Environment Variable:** -AUGUR_DB +COLLECTOSS_DB Related Resources ----------------- diff --git a/docs/source/development-guide/create-a-metric/api-development.rst b/docs/source/development-guide/create-a-metric/api-development.rst index 8aea48aac..12a010465 100644 --- a/docs/source/development-guide/create-a-metric/api-development.rst +++ b/docs/source/development-guide/create-a-metric/api-development.rst @@ -11,13 +11,13 @@ JSON Metrics are here: .. code-block:: bash - $ AUGUR_HOME/collectoss/metrics + $ COLLECTOSS_HOME/collectoss/metrics Visualization Metrics are here: .. code-block:: bash - $ AUGUR_HOME/collectoss/routes + $ COLLECTOSS_HOME/collectoss/routes Existing metrics files (JSON Metric) "Standard Metrics": @@ -46,7 +46,7 @@ You can see that one of the imports is our standard metric import from the util .. code-block:: python - AUGUR_HOME/collectoss/routes/util.py + COLLECTOSS_HOME/collectoss/routes/util.py All "Standard Metrics" share declaration and a method signature diff --git a/docs/source/development-guide/create-a-metric/metrics-steps.rst b/docs/source/development-guide/create-a-metric/metrics-steps.rst index 5604c422b..a2fb24a02 100644 --- a/docs/source/development-guide/create-a-metric/metrics-steps.rst +++ b/docs/source/development-guide/create-a-metric/metrics-steps.rst @@ -11,7 +11,7 @@ There are many paths, but we usually follow something along these lines: 2. Sometimes, there are metrics endpoints that integrate, or visualize several metrics. 3. Determine what tables in the CollectOSS Schema contain the data we need to develop this metric 4. Construct a very basic query that does the work of joining those tables in a minimal way so we have a "baseline query." -5. Refine the query so that it takes the standard inputs for a "standard metric" if that's what type it is; alternatively, look at non-standard metrics as they are defined in ``AUGUR_HOME/collectoss/routes``, or one of the visualization metrics in ``AUGUR_HOME/collectoss/routes/contributor.py``, ``AUGUR_HOME/collectoss/routes/pull_requests.py`` or ``AUGUR_HOME/collectoss/routes/nonstandard_metrics.py``. (This step is explained in the next section.) +5. Refine the query so that it takes the standard inputs for a "standard metric" if that's what type it is; alternatively, look at non-standard metrics as they are defined in ``COLLECTOSS_HOME/collectoss/routes``, or one of the visualization metrics in ``COLLECTOSS_HOME/collectoss/routes/contributor.py``, ``COLLECTOSS_HOME/collectoss/routes/pull_requests.py`` or ``COLLECTOSS_HOME/collectoss/routes/nonstandard_metrics.py``. (This step is explained in the next section.) Example Query diff --git a/docs/source/development-guide/workers/creating_a_new_worker.rst b/docs/source/development-guide/workers/creating_a_new_worker.rst index 4e713c4ac..a34d73f4b 100644 --- a/docs/source/development-guide/workers/creating_a_new_worker.rst +++ b/docs/source/development-guide/workers/creating_a_new_worker.rst @@ -132,7 +132,7 @@ In the Worker block you need to add something like this: There should NOT be a comma after the final entry in each block. -ALSO, if you wanted to have those blocks installed with auger itself when you do the PR, you need to add them to the `$AUGUR_ROOT/collectoss/config.py` file. The recommended way is to set a port range not already in use and assign a random variable range with the others, like this `your_new_worker_p = randint(56500, 56999)` ... its totally ok to compress a couple other port ranges for this process. +ALSO, if you wanted to have those blocks installed with auger itself when you do the PR, you need to add them to the `$COLLECTOSS_ROOT/collectoss/config.py` file. The recommended way is to set a port range not already in use and assign a random variable range with the others, like this `your_new_worker_p = randint(56500, 56999)` ... its totally ok to compress a couple other port ranges for this process. You can copy the housekeeper block verbatim from what you added to your own `augur.config.json`. For the worker block, in the `config.py` it would look like this: diff --git a/docs/source/docker/docker-compose.rst b/docs/source/docker/docker-compose.rst index 5c5d16a47..96e8e1c51 100644 --- a/docs/source/docker/docker-compose.rst +++ b/docs/source/docker/docker-compose.rst @@ -27,16 +27,16 @@ This section of the documentation details how to use CollectOSS's Docker Compose .. warning:: Don't forget to provide your external database credentials in a file called ``.env`` file. Make sure all the following environment variables are specified, keep placeholder values if you don't need some of them. - Don't specify AUGUR_DB if you want the docker database to be used. + Don't specify COLLECTOSS_DB if you want the docker database to be used. Example .env: .. code:: - AUGUR_GITHUB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx - AUGUR_GITHUB_USERNAME=usernameGithub - AUGUR_GITLAB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx - AUGUR_GITLAB_USERNAME=usernameGitlab - AUGUR_DB=yourDBString + COLLECTOSS_GITHUB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx + COLLECTOSS_GITHUB_USERNAME=usernameGithub + COLLECTOSS_GITLAB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx + COLLECTOSS_GITLAB_USERNAME=usernameGitlab + COLLECTOSS_DB=yourDBString diff --git a/docs/source/docker/getting-started.rst b/docs/source/docker/getting-started.rst index 0648236a5..db6822b79 100644 --- a/docs/source/docker/getting-started.rst +++ b/docs/source/docker/getting-started.rst @@ -31,14 +31,14 @@ with the following fields (don't remove any variable, keep placeholder values if .. code:: python - AUGUR_DB=collectoss - AUGUR_DB_USER=collectoss - AUGUR_DB_PASSWORD=password_here + COLLECTOSS_DB=collectoss + COLLECTOSS_DB_USER=collectoss + COLLECTOSS_DB_PASSWORD=password_here - AUGUR_GITHUB_API_KEY=ghp_value_here - AUGUR_GITHUB_USERNAME=gh_username - AUGUR_GITLAB_API_KEY=placeholder - AUGUR_GITLAB_USERNAME=placeholder + COLLECTOSS_GITHUB_API_KEY=ghp_value_here + COLLECTOSS_GITHUB_USERNAME=gh_username + COLLECTOSS_GITLAB_API_KEY=placeholder + COLLECTOSS_GITLAB_USERNAME=placeholder Then run: @@ -98,11 +98,11 @@ You can provide your own ``.env`` file to pull from. The file should have the be .. code:: - AUGUR_GITHUB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx - AUGUR_GITHUB_USERNAME=usernameGithub - AUGUR_GITLAB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx - AUGUR_GITLAB_USERNAME=usernameGitlab - AUGUR_DB=yourDBString + COLLECTOSS_GITHUB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx + COLLECTOSS_GITHUB_USERNAME=usernameGithub + COLLECTOSS_GITLAB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx + COLLECTOSS_GITLAB_USERNAME=usernameGitlab + COLLECTOSS_DB=yourDBString Now that you've created your config file or are ready to generate it yourself, you're ready to `get going `_ . diff --git a/docs/source/docker/quick-start.rst b/docs/source/docker/quick-start.rst index 86b552ea3..c71d9dfa2 100644 --- a/docs/source/docker/quick-start.rst +++ b/docs/source/docker/quick-start.rst @@ -13,14 +13,14 @@ Before you get off to such a quick start, go ahead and .. code:: python - AUGUR_DB=collectoss - AUGUR_DB_USER=collectoss - AUGUR_DB_PASSWORD=password_here + COLLECTOSS_DB=collectoss + COLLECTOSS_DB_USER=collectoss + COLLECTOSS_DB_PASSWORD=password_here - AUGUR_GITHUB_API_KEY=ghp_value_here - AUGUR_GITHUB_USERNAME=gh_username - AUGUR_GITLAB_API_KEY=placeholder - AUGUR_GITLAB_USERNAME=placeholder + COLLECTOSS_GITHUB_API_KEY=ghp_value_here + COLLECTOSS_GITHUB_USERNAME=gh_username + COLLECTOSS_GITLAB_API_KEY=placeholder + COLLECTOSS_GITLAB_USERNAME=placeholder 5. Build the container using one of the following commands: @@ -57,14 +57,14 @@ And collectoss should be up and running! .. code-block:: - AUGUR_DB=collectoss - AUGUR_DB_USER=collectoss - AUGUR_DB_PASSWORD=password_here + COLLECTOSS_DB=collectoss + COLLECTOSS_DB_USER=collectoss + COLLECTOSS_DB_PASSWORD=password_here - AUGUR_GITHUB_API_KEY=ghp_value_here - AUGUR_GITHUB_USERNAME=gh_username - AUGUR_GITLAB_API_KEY=placeholder - AUGUR_GITLAB_USERNAME=placeholder + COLLECTOSS_GITHUB_API_KEY=ghp_value_here + COLLECTOSS_GITHUB_USERNAME=gh_username + COLLECTOSS_GITLAB_API_KEY=placeholder + COLLECTOSS_GITLAB_USERNAME=placeholder 4. Execute the code from the base directory of the CollectOSS repository: diff --git a/docs/source/getting-started/command-line-interface/backend.rst b/docs/source/getting-started/command-line-interface/backend.rst index d53fd36ae..2adcce0ef 100644 --- a/docs/source/getting-started/command-line-interface/backend.rst +++ b/docs/source/getting-started/command-line-interface/backend.rst @@ -145,29 +145,29 @@ Successful output looks like: .. code-block:: bash - > CLI: [util.export_env] [INFO] Exporting AUGUR_GITHUB_API_KEY - > CLI: [util.export_env] [INFO] Exporting AUGUR_DB_HOST - > CLI: [util.export_env] [INFO] Exporting AUGUR_DB_NAME - > CLI: [util.export_env] [INFO] Exporting AUGUR_DB_PORT - > CLI: [util.export_env] [INFO] Exporting AUGUR_DB_USER - > CLI: [util.export_env] [INFO] Exporting AUGUR_DB_PASSWORD + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_GITHUB_API_KEY + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_DB_HOST + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_DB_NAME + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_DB_PORT + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_DB_USER + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_DB_PASSWORD # contents of collectoss_export_env.sh #!/bin/bash - export AUGUR_GITHUB_API_KEY="your_key_here" - export AUGUR_DB_HOST="your_host" - export AUGUR_DB_NAME="your_db_name" - export AUGUR_DB_PORT="your_db_port" - export AUGUR_DB_USER="your_db_user" - export AUGUR_DB_PASSWORD="your_db_password" + export COLLECTOSS_GITHUB_API_KEY="your_key_here" + export COLLECTOSS_DB_HOST="your_host" + export COLLECTOSS_DB_NAME="your_db_name" + export COLLECTOSS_DB_PORT="your_db_port" + export COLLECTOSS_DB_USER="your_db_user" + export COLLECTOSS_DB_PASSWORD="your_db_password" # contents of docker_env.txt - AUGUR_GITHUB_API_KEY="your_key_here" - AUGUR_DB_HOST="your_host" - AUGUR_DB_NAME="your_db_name" - AUGUR_DB_PORT="your_db_port" - AUGUR_DB_USER="your_db_user" - AUGUR_DB_PASSWORD="your_db_password" + COLLECTOSS_GITHUB_API_KEY="your_key_here" + COLLECTOSS_DB_HOST="your_host" + COLLECTOSS_DB_NAME="your_db_name" + COLLECTOSS_DB_PORT="your_db_port" + COLLECTOSS_DB_USER="your_db_user" + COLLECTOSS_DB_PASSWORD="your_db_password" ``repo-reset`` diff --git a/docs/source/getting-started/command-line-interface/configure.rst b/docs/source/getting-started/command-line-interface/configure.rst index 5659cf6ec..89350bc1a 100644 --- a/docs/source/getting-started/command-line-interface/configure.rst +++ b/docs/source/getting-started/command-line-interface/configure.rst @@ -12,19 +12,19 @@ The ``init`` command is used to create a configuration file, by default named `` Each of the available parameters is optional, and can also be configured using an existing environment variable. Below is the list of available parameters, their defaults, and the corresponding environment variable. ---db_name Database name for your data collection database. Defaults to ``augur``. Set by the ``AUGUR_DB_NAME`` environment variable +--db_name Database name for your data collection database. Defaults to ``augur``. Set by the ``COLLECTOSS_DB_NAME`` environment variable ---db_host Host for your data collection database. Defaults to ``localhost``. Set by the ``AUGUR_DB_HOST`` environment variable +--db_host Host for your data collection database. Defaults to ``localhost``. Set by the ``COLLECTOSS_DB_HOST`` environment variable ---db_user User for your data collection database. Defaults to ``augur``. Set by the ``AUGUR_DB_USER`` environment variable +--db_user User for your data collection database. Defaults to ``augur``. Set by the ``COLLECTOSS_DB_USER`` environment variable ---db_port Port for your data collection database. Defaults to ``5432``. Set by the ``AUGUR_DB_PORT`` environment variable +--db_port Port for your data collection database. Defaults to ``5432``. Set by the ``COLLECTOSS_DB_PORT`` environment variable ---db_password Password for your data collection database. Defaults to ``augur``. Set by the ``AUGUR_DB_PASSWORD`` environment variable +--db_password Password for your data collection database. Defaults to ``augur``. Set by the ``COLLECTOSS_DB_PASSWORD`` environment variable ---github_api_key GitHub API key for data collection from the GitHub API. Defaults to ``key``. Set by the ``AUGUR_GITHUB_API_KEY`` environment variable +--github_api_key GitHub API key for data collection from the GitHub API. Defaults to ``key``. Set by the ``COLLECTOSS_GITHUB_API_KEY`` environment variable ---facade_repo_directory The directory on this machine where Facade should store its cloned repos. Defaults to ``repos/``. Set by the ``AUGUR_FACADE_REPO_DIRECTORY`` environment variable +--facade_repo_directory The directory on this machine where Facade should store its cloned repos. Defaults to ``repos/``. Set by the ``COLLECTOSS_FACADE_REPO_DIRECTORY`` environment variable --rc-config-file Path to an existing CollectOSS config file whose values will be used as the defaults. Defaults to ``None``. This parameter does not support being set by an environment variable. @@ -41,7 +41,7 @@ Example usage\: $ uv run collectoss config init --db_name "db_name" --db_host "host" --db_port "port" --db_user "db_user" --db_password "password" --github_api_key "github_api_key" --facade_repo_directory "facade_repo_directory" # to generate an augur.config.json given all credentials and environment variables - $ uv run collectoss config init --db_name $AUGUR_DB_NAME --db_host $AUGUR_DB_HOST --db_port $AUGUR_DB_PORT --db_user $AUGUR_DB_DB_USER --db_password $AUGUR_DB_PASSWORD --github_api_key $AUGUR_GITHUB_API_KEY --facade_repo_directory $AUGUR_FACADE_REPO_DIRECTORY + $ uv run collectoss config init --db_name $COLLECTOSS_DB_NAME --db_host $COLLECTOSS_DB_HOST --db_port $COLLECTOSS_DB_PORT --db_user $COLLECTOSS_DB_DB_USER --db_password $COLLECTOSS_DB_PASSWORD --github_api_key $COLLECTOSS_GITHUB_API_KEY --facade_repo_directory $COLLECTOSS_FACADE_REPO_DIRECTORY # successful output looks like: > CLI: [config.init] [INFO] Config written to /Users/carter/.collectoss/augur.config.json diff --git a/docs/source/getting-started/using-docker.rst b/docs/source/getting-started/using-docker.rst index 5028d5c3a..c427372b1 100644 --- a/docs/source/getting-started/using-docker.rst +++ b/docs/source/getting-started/using-docker.rst @@ -14,14 +14,14 @@ the following resources (or more). .. code:: python - AUGUR_DB=augur - AUGUR_DB_USER=augur - AUGUR_DB_PASSWORD=password_here - - AUGUR_GITHUB_API_KEY=ghp_value_here - AUGUR_GITHUB_USERNAME=gh_username - AUGUR_GITLAB_API_KEY=placeholder - AUGUR_GITLAB_USERNAME=placeholder + COLLECTOSS_DB=augur + COLLECTOSS_DB_USER=augur + COLLECTOSS_DB_PASSWORD=password_here + + COLLECTOSS_GITHUB_API_KEY=ghp_value_here + COLLECTOSS_GITHUB_USERNAME=gh_username + COLLECTOSS_GITLAB_API_KEY=placeholder + COLLECTOSS_GITLAB_USERNAME=placeholder 3. Build the container using one of the following commands: From bef5639443e88630fd27fe937241e460b92f21ee Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 11:30:34 -0400 Subject: [PATCH 24/77] update vars in example env Signed-off-by: Adrian Edwards --- environment.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/environment.txt b/environment.txt index 42d00b9c1..3d4c4a721 100644 --- a/environment.txt +++ b/environment.txt @@ -1,12 +1,12 @@ -AUGUR_DB_HOST=collectoss -AUGUR_DB_NAME=collectoss -AUGUR_DB_USER=collectoss -AUGUR_DB_PASSWORD= +COLLECTOSS_DB_HOST=collectoss +COLLECTOSS_DB_NAME=collectoss +COLLECTOSS_DB_USER=collectoss +COLLECTOSS_DB_PASSWORD= -AUGUR_GITHUB_API_KEY= -AUGUR_GITHUB_USERNAME= -AUGUR_GITLAB_API_KEY= -AUGUR_GITLAB_USERNAME= +COLLECTOSS_GITHUB_API_KEY= +COLLECTOSS_GITHUB_USERNAME= +COLLECTOSS_GITLAB_API_KEY= +COLLECTOSS_GITLAB_USERNAME= -AUGUR_RABBITMQ_USERNAME= -AUGUR_RABBITMQ_PASSWORD= +COLLECTOSS_RABBITMQ_USERNAME= +COLLECTOSS_RABBITMQ_PASSWORD= From 6fd7ee0edd825e168b4b19e35a35e73489273bce Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 27 May 2026 11:32:34 -0400 Subject: [PATCH 25/77] update variable names in docker compose Signed-off-by: Adrian Edwards --- docker-compose.yml | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 55e1127be..e1e8ed8da 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,11 +7,11 @@ services: restart: unless-stopped environment: - "POSTGRES_DB=augur" - - "POSTGRES_USER=${AUGUR_DB_USER:-augur}" - - "POSTGRES_PASSWORD=${AUGUR_DB_PASSWORD:-augur}" + - "POSTGRES_USER=${COLLECTOSS_DB_USER:-augur}" + - "POSTGRES_PASSWORD=${COLLECTOSS_DB_PASSWORD:-augur}" - "PGDATA=/var/lib/postgresql/data/pgdata" ports: - - "${AUGUR_DB_PORT:-5432}:5432" + - "${COLLECTOSS_DB_PORT:-5432}:5432" volumes: - augurpostgres:/var/lib/postgresql/data @@ -36,15 +36,15 @@ services: context: . dockerfile: ./docker/rabbitmq/Dockerfile args: - - RABBIT_MQ_DEFAULT_USER=${AUGUR_RABBITMQ_USERNAME:-augur} - - RABBIT_MQ_DEFAULT_PASSWORD=${AUGUR_RABBITMQ_PASSWORD:-password123} - - RABBIT_MQ_DEFAULT_VHOST=${AUGUR_RABBITMQ_VHOST:-collectoss_vhost} + - RABBIT_MQ_DEFAULT_USER=${COLLECTOSS_RABBITMQ_USERNAME:-augur} + - RABBIT_MQ_DEFAULT_PASSWORD=${COLLECTOSS_RABBITMQ_PASSWORD:-password123} + - RABBIT_MQ_DEFAULT_VHOST=${COLLECTOSS_RABBITMQ_VHOST:-collectoss_vhost} core: image: collectoss:latest build: context: . - dockerfile: ./docker/backend/${AUGUR_TARGET:-Dockerfile} + dockerfile: ./docker/backend/${COLLECTOSS_TARGET:-Dockerfile} volumes: - cache:/cache:rw - config:/config:rw @@ -56,16 +56,16 @@ services: #extra_hosts: # - "host.docker.internal:host-gateway" #Be able to ping services on the local machine environment: - - "AUGUR_DB=postgresql+psycopg2://${AUGUR_DB_USER:-augur}:${AUGUR_DB_PASSWORD:-augur}@database:5432/augur" - - "AUGUR_DB_SCHEMA_BUILD=1" - - AUGUR_FACADE_REPO_DIRECTORY=/facade - - "AUGUR_FLAGS=$AUGUR_FLAGS" - - "AUGUR_GITHUB_API_KEY=${AUGUR_GITHUB_API_KEY}" - - "AUGUR_GITLAB_API_KEY=${AUGUR_GITLAB_API_KEY}" - - "AUGUR_GITHUB_USERNAME=${AUGUR_GITHUB_USERNAME}" - - "AUGUR_GITLAB_USERNAME=${AUGUR_GITLAB_USERNAME}" + - "COLLECTOSS_DB=postgresql+psycopg2://${COLLECTOSS_DB_USER:-augur}:${COLLECTOSS_DB_PASSWORD:-augur}@database:5432/augur" + - "COLLECTOSS_DB_SCHEMA_BUILD=1" + - COLLECTOSS_FACADE_REPO_DIRECTORY=/facade + - "COLLECTOSS_FLAGS=$COLLECTOSS_FLAGS" + - "COLLECTOSS_GITHUB_API_KEY=${COLLECTOSS_GITHUB_API_KEY}" + - "COLLECTOSS_GITLAB_API_KEY=${COLLECTOSS_GITLAB_API_KEY}" + - "COLLECTOSS_GITHUB_USERNAME=${COLLECTOSS_GITHUB_USERNAME}" + - "COLLECTOSS_GITLAB_USERNAME=${COLLECTOSS_GITLAB_USERNAME}" - REDIS_CONN_STRING=redis://redis:6379 - - RABBITMQ_CONN_STRING=amqp://${AUGUR_RABBITMQ_USERNAME:-augur}:${AUGUR_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${AUGUR_RABBITMQ_VHOST:-collectoss_vhost} + - RABBITMQ_CONN_STRING=amqp://${COLLECTOSS_RABBITMQ_USERNAME:-augur}:${COLLECTOSS_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${COLLECTOSS_RABBITMQ_VHOST:-collectoss_vhost} - CONFIG_LOCATION=/config/config.yml - CONFIG_DATADIR=/config - CACHE_DATADIR=/cache @@ -92,9 +92,9 @@ services: # ports: # - 5555:5555 # environment: - # - "AUGUR_DB=postgresql+psycopg2://${AUGUR_DB_USER:-augur}:${AUGUR_DB_PASSWORD:-augur}@database:5432/augur" + # - "COLLECTOSS_DB=postgresql+psycopg2://${COLLECTOSS_DB_USER:-augur}:${COLLECTOSS_DB_PASSWORD:-augur}@database:5432/augur" # - REDIS_CONN_STRING=redis://redis:6379 - # - RABBITMQ_CONN_STRING=amqp://${AUGUR_RABBITMQ_USERNAME:-augur}:${AUGUR_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${AUGUR_RABBITMQ_VHOST:-collectoss_vhost} + # - RABBITMQ_CONN_STRING=amqp://${COLLECTOSS_RABBITMQ_USERNAME:-augur}:${COLLECTOSS_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${COLLECTOSS_RABBITMQ_VHOST:-collectoss_vhost} # depends_on: # - core # - database From 0aa0d8accca4ed78fc82d7411678b4a56ee82ac8 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 12:55:57 -0400 Subject: [PATCH 26/77] hard change env var prefix for CLI commands Signed-off-by: Adrian Edwards --- collectoss/application/cli/_multicommand.py | 2 +- collectoss/application/cli/config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/collectoss/application/cli/_multicommand.py b/collectoss/application/cli/_multicommand.py index 13186e7bb..06aae01de 100644 --- a/collectoss/application/cli/_multicommand.py +++ b/collectoss/application/cli/_multicommand.py @@ -11,7 +11,7 @@ from pathlib import Path # import collectoss.application -CONTEXT_SETTINGS = dict(auto_envvar_prefix='AUGUR') +CONTEXT_SETTINGS = dict(auto_envvar_prefix='COLLECTOSS') class CLIMultiCommand(click.MultiCommand): def __commands_folder(self): diff --git a/collectoss/application/cli/config.py b/collectoss/application/cli/config.py index 2a9a09320..9753f5299 100644 --- a/collectoss/application/cli/config.py +++ b/collectoss/application/cli/config.py @@ -16,7 +16,7 @@ logger = logging.getLogger(__name__) -ENVVAR_PREFIX = "AUGUR_" +ENVVAR_PREFIX = "COLLECTOSS_" @click.group('config', short_help='Generate an augur.config.json') @click.pass_context From 110f42b6fafd871048e2ceeb1960bfdb9d822c98 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 12:56:38 -0400 Subject: [PATCH 27/77] attempt to add transitional variables for the specific existing places where env vars are explicitly needed in the CLI Signed-off-by: Adrian Edwards --- collectoss/application/cli/config.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/collectoss/application/cli/config.py b/collectoss/application/cli/config.py index 9753f5299..50641439e 100644 --- a/collectoss/application/cli/config.py +++ b/collectoss/application/cli/config.py @@ -18,18 +18,21 @@ ENVVAR_PREFIX = "COLLECTOSS_" +def get_transitional_envs(name: str) -> list: + return [ENVVAR_PREFIX + name, "AUGUR_" + name] + @click.group('config', short_help='Generate an augur.config.json') @click.pass_context def cli(ctx): ctx.obj = DatabaseContext() @cli.command('init') -@click.option('--github-api-key', help="GitHub API key for data collection from the GitHub API", envvar=ENVVAR_PREFIX + 'GITHUB_API_KEY') -@click.option('--facade-repo-directory', help="Directory on the database server where Facade should clone repos", envvar=ENVVAR_PREFIX + 'FACADE_REPO_DIRECTORY') -@click.option('--gitlab-api-key', help="GitLab API key for data collection from the GitLab API", envvar=ENVVAR_PREFIX + 'GITLAB_API_KEY') -@click.option('--redis-conn-string', help="String to connect to redis cache", envvar=ENVVAR_PREFIX + 'REDIS_CONN_STRING') -@click.option('--rabbitmq-conn-string', help="String to connect to rabbitmq broker", envvar=ENVVAR_PREFIX + 'RABBITMQ_CONN_STRING') -@click.option('--logs-directory', help="Directory to store logs", envvar=ENVVAR_PREFIX + 'LOGS_DIRECTORY') +@click.option('--github-api-key', help="GitHub API key for data collection from the GitHub API", envvar=get_transitional_envs('GITHUB_API_KEY')) +@click.option('--facade-repo-directory', help="Directory on the database server where Facade should clone repos", envvar=get_transitional_envs('FACADE_REPO_DIRECTORY')) +@click.option('--gitlab-api-key', help="GitLab API key for data collection from the GitLab API", envvar=get_transitional_envs('GITLAB_API_KEY')) +@click.option('--redis-conn-string', help="String to connect to redis cache", envvar=get_transitional_envs('REDIS_CONN_STRING')) +@click.option('--rabbitmq-conn-string', help="String to connect to rabbitmq broker", envvar=get_transitional_envs('RABBITMQ_CONN_STRING')) +@click.option('--logs-directory', help="Directory to store logs", envvar=get_transitional_envs('LOGS_DIRECTORY')) @test_connection @test_db_connection @with_database From f3ff5a34310b1eab69ea948aedcdf63c082fa3d8 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 17:12:30 -0400 Subject: [PATCH 28/77] factor tests into a class Signed-off-by: Adrian Edwards --- .../test_config/test_environment.py | 102 +++++++++--------- 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/tests/test_application/test_config/test_environment.py b/tests/test_application/test_config/test_environment.py index 6b62f2ec9..587910da7 100644 --- a/tests/test_application/test_config/test_environment.py +++ b/tests/test_application/test_config/test_environment.py @@ -6,75 +6,77 @@ prefixes = ["COLLECTOSS", "OTHER"] -def test_env_extract_prefix(): - assert extract_prefix("OTHER_DB", prefixes) == "OTHER_" - assert extract_prefix("COLLECTOSS_DB", prefixes) == "COLLECTOSS_" +class TestSystemEnv: -def test_env_extract_prefix_default(): - assert extract_prefix("SOME_DB", prefixes) is None - assert extract_prefix("THINGY_DB", prefixes) is None + def test_env_extract_prefix(self): + assert extract_prefix("OTHER_DB", prefixes) == "OTHER_" + assert extract_prefix("COLLECTOSS_DB", prefixes) == "COLLECTOSS_" + def test_env_extract_prefix_default(self): + assert extract_prefix("SOME_DB", prefixes) is None + assert extract_prefix("THINGY_DB", prefixes) is None -def test_env_extract_prefix_unprefixed(): - assert extract_prefix("DB", prefixes) is None -def test_fetching_env(): - # plain - os.environ["COLLECTOSS_NAME"] = "A" - assert SystemEnv.get("COLLECTOSS_NAME") == "A" + def test_env_extract_prefix_unprefixed(self): + assert extract_prefix("DB", prefixes) is None - # fallback handling - os.environ["OTHER_THING"] = "B" - assert SystemEnv.get("COLLECTOSS_THING", None, prefixes) == "B" + def test_fetching_env(self): + # plain + os.environ["COLLECTOSS_NAME"] = "A" + assert SystemEnv.get("COLLECTOSS_NAME") == "A" - # cleanup - del os.environ["COLLECTOSS_NAME"] - del os.environ["OTHER_THING"] + # fallback handling + os.environ["OTHER_THING"] = "B" + assert SystemEnv.get("COLLECTOSS_THING", None, prefixes) == "B" -def test_fetching_env_backwards(): - os.environ["COLLECTOSS_NAME"] = "A" - assert SystemEnv.get("OTHER_NAME", None, prefixes) == "A" + # cleanup + del os.environ["COLLECTOSS_NAME"] + del os.environ["OTHER_THING"] - # cleanup - del os.environ["COLLECTOSS_NAME"] + def test_fetching_env_backwards(self): + os.environ["COLLECTOSS_NAME"] = "A" + assert SystemEnv.get("OTHER_NAME", None, prefixes) == "A" -def test_fetching_env_no_value(): - assert SystemEnv.get("COLLECTOSS_MISSING", None, prefixes) is None + # cleanup + del os.environ["COLLECTOSS_NAME"] -def test_fetching_env_default(): - assert SystemEnv.get("COLLECTOSS_DEFAULT", "SOME", prefixes) == "SOME" + def test_fetching_env_no_value(self): + assert SystemEnv.get("COLLECTOSS_MISSING", None, prefixes) is None -def test_no_known_prefix(): - # fallback handling - os.environ["THING"] = "C" - assert SystemEnv.get("THING", None, prefixes) == "C" + def test_fetching_env_default(self): + assert SystemEnv.get("COLLECTOSS_DEFAULT", "SOME", prefixes) == "SOME" + def test_no_known_prefix(self): + # fallback handling + os.environ["THING"] = "C" + assert SystemEnv.get("THING", None, prefixes) == "C" -def test_get_bool_trues(): - cases = ["1", "true", "True", "TRUE", "y", "Y", "yes", "Yes"] + def test_get_bool_trues(self): - for case in cases: - os.environ["OTHER_BOOL"] = case - assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == True - del os.environ["OTHER_BOOL"] + cases = ["1", "true", "True", "TRUE", "y", "Y", "yes", "Yes"] -def test_get_bool_falses(): + for case in cases: + os.environ["OTHER_BOOL"] = case + assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == True + del os.environ["OTHER_BOOL"] - cases = ["0", "false", "False", "FALSE", "n", "N", "no", "No"] + def test_get_bool_falses(self): - for case in cases: - os.environ["OTHER_BOOL"] = case - assert SystemEnv.get_bool("OTHER_BOOL", True, prefixes) == False - del os.environ["OTHER_BOOL"] + cases = ["0", "false", "False", "FALSE", "n", "N", "no", "No"] -def test_get_bool_default(): + for case in cases: + os.environ["OTHER_BOOL"] = case + assert SystemEnv.get_bool("OTHER_BOOL", True, prefixes) == False + del os.environ["OTHER_BOOL"] - cases = ["?", "maybe", "Stuff", "333"] + def test_get_bool_default(self): - for case in cases: - os.environ["OTHER_BOOL"] = case - assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == False - del os.environ["OTHER_BOOL"] + cases = ["?", "maybe", "Stuff", "333"] - + for case in cases: + os.environ["OTHER_BOOL"] = case + assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == False + del os.environ["OTHER_BOOL"] + + From 75345cba1c31be4115b0ab62150272bcd242a5dd Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 17:12:42 -0400 Subject: [PATCH 29/77] import SystemEnv into celery_app Signed-off-by: Adrian Edwards --- collectoss/tasks/init/celery_app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/collectoss/tasks/init/celery_app.py b/collectoss/tasks/init/celery_app.py index 22fd34872..4b10af18a 100644 --- a/collectoss/tasks/init/celery_app.py +++ b/collectoss/tasks/init/celery_app.py @@ -17,6 +17,7 @@ from collectoss.application.db import get_engine from collectoss.application.db.lib import get_session from collectoss.application.config import SystemConfig +from collectoss.application.environment import SystemEnv from collectoss.tasks.init import get_redis_conn_values, get_rabbitmq_conn_string from collectoss.application.db.models import Repo from collectoss.tasks.util.collection_state import CollectionState From bcff419815881cf65cde36554f046c529a16c949 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 17:17:51 -0400 Subject: [PATCH 30/77] move test next to the actual known good config tests Signed-off-by: Adrian Edwards --- .../test_config => test_classes}/test_environment.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_application/test_config => test_classes}/test_environment.py (100%) diff --git a/tests/test_application/test_config/test_environment.py b/tests/test_classes/test_environment.py similarity index 100% rename from tests/test_application/test_config/test_environment.py rename to tests/test_classes/test_environment.py From 3351859a90c0f51a116155a9029f43a27f687476 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 17:26:42 -0400 Subject: [PATCH 31/77] add more detailed failure reasons to get_bool tests Signed-off-by: Adrian Edwards --- tests/test_classes/test_environment.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_classes/test_environment.py b/tests/test_classes/test_environment.py index 587910da7..38c0a99e2 100644 --- a/tests/test_classes/test_environment.py +++ b/tests/test_classes/test_environment.py @@ -58,7 +58,7 @@ def test_get_bool_trues(self): for case in cases: os.environ["OTHER_BOOL"] = case - assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == True + assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == True, f"value '{case}' should resolve to True" del os.environ["OTHER_BOOL"] def test_get_bool_falses(self): @@ -67,7 +67,7 @@ def test_get_bool_falses(self): for case in cases: os.environ["OTHER_BOOL"] = case - assert SystemEnv.get_bool("OTHER_BOOL", True, prefixes) == False + assert SystemEnv.get_bool("OTHER_BOOL", True, prefixes) == False, f"value '{case}' should resolve to False" del os.environ["OTHER_BOOL"] def test_get_bool_default(self): @@ -76,7 +76,7 @@ def test_get_bool_default(self): for case in cases: os.environ["OTHER_BOOL"] = case - assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == False + assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == False, f"value '{case}' should resolve to Default value" del os.environ["OTHER_BOOL"] From 3585e5062ba76d3e28b5e76d13ab8852db4c2182 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 17:29:24 -0400 Subject: [PATCH 32/77] split environment tests into two classes Signed-off-by: Adrian Edwards --- tests/test_classes/test_environment.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_classes/test_environment.py b/tests/test_classes/test_environment.py index 38c0a99e2..e6621062a 100644 --- a/tests/test_classes/test_environment.py +++ b/tests/test_classes/test_environment.py @@ -6,8 +6,7 @@ prefixes = ["COLLECTOSS", "OTHER"] -class TestSystemEnv: - +class TestExtractPrefix: def test_env_extract_prefix(self): assert extract_prefix("OTHER_DB", prefixes) == "OTHER_" assert extract_prefix("COLLECTOSS_DB", prefixes) == "COLLECTOSS_" @@ -20,6 +19,8 @@ def test_env_extract_prefix_default(self): def test_env_extract_prefix_unprefixed(self): assert extract_prefix("DB", prefixes) is None +class TestSystemEnv: + def test_fetching_env(self): # plain os.environ["COLLECTOSS_NAME"] = "A" From 19984f247d74c5ed7b3589916d139010cbb241e2 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 17:46:37 -0400 Subject: [PATCH 33/77] apply homedir resolution fix to SCC path as well Signed-off-by: Adrian Edwards --- collectoss/tasks/git/scc_value_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectoss/tasks/git/scc_value_tasks/core.py b/collectoss/tasks/git/scc_value_tasks/core.py index a526af990..770165522 100644 --- a/collectoss/tasks/git/scc_value_tasks/core.py +++ b/collectoss/tasks/git/scc_value_tasks/core.py @@ -21,7 +21,7 @@ def value_model(logger,repo_git): logger.info(f"Repo ID: {repo_id}, Path: {path}") logger.info('Running scc...') - path_to_scc = SystemEnv.get('SCC_DIR', (SystemEnv.get('HOME') or "~") + '/scc') + path_to_scc = SystemEnv.get('SCC_DIR', os.path.expanduser('~/scc')) required_output = parse_json_from_subprocess_call(logger,['./scc', '-f','json','--by-file', path], cwd=path_to_scc) From c2b6215410eb5a7d88e322fa20de6f422136a760 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 17:51:58 -0400 Subject: [PATCH 34/77] fix docs underline lengths Signed-off-by: Adrian Edwards --- docs/source/deployment/production.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/deployment/production.rst b/docs/source/deployment/production.rst index 614737256..186a38c4c 100644 --- a/docs/source/deployment/production.rst +++ b/docs/source/deployment/production.rst @@ -15,7 +15,7 @@ to your deployment: - ``COLLECTOSS_DB`` : PostgreSQL database connection string (used if variable not set) COLLECTOSS_RESET_LOGS ----------------- +--------------------- **Description:** Controls whether CollectOSS resets its log files every time the server starts. Useful for managing log size or integrating with external log rotation systems. @@ -39,7 +39,7 @@ If set to `False`, CollectOSS will not reset logs automatically. Administrators export COLLECTOSS_RESET_LOGS=False COLLECTOSS_DB --------- +------------- **Description:** Specifies the connection string for the PostgreSQL database used by CollectOSS. If omitted, the default Docker database is used. From 97d443c716b35e9f82663ef275e8cbf5502aac9b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 17:59:04 -0400 Subject: [PATCH 35/77] remove unused imports Signed-off-by: Adrian Edwards --- collectoss/api/view/init.py | 1 - collectoss/tasks/git/dependency_tasks/core.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/collectoss/api/view/init.py b/collectoss/api/view/init.py index 1ab68912c..b26752af9 100644 --- a/collectoss/api/view/init.py +++ b/collectoss/api/view/init.py @@ -1,4 +1,3 @@ -import os from pathlib import Path from collectoss.application.logs import SystemLogger import secrets, yaml diff --git a/collectoss/tasks/git/dependency_tasks/core.py b/collectoss/tasks/git/dependency_tasks/core.py index 3bd2aaab2..0648231b0 100644 --- a/collectoss/tasks/git/dependency_tasks/core.py +++ b/collectoss/tasks/git/dependency_tasks/core.py @@ -1,7 +1,7 @@ from datetime import datetime import os from collectoss.application.db.models import * -from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value, get_session +from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value from collectoss.application.environment import SystemEnv from collectoss.tasks.github.util.github_api_key_handler import GithubApiKeyHandler from collectoss.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc From e949c77a8538ffd6c04b54d32e30db2e87541680 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 29 May 2026 18:29:44 -0400 Subject: [PATCH 36/77] use SystemEnv for fetching database variable Signed-off-by: Adrian Edwards --- collectoss/application/db/engine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/collectoss/application/db/engine.py b/collectoss/application/db/engine.py index e00c3c992..5aae4466e 100644 --- a/collectoss/application/db/engine.py +++ b/collectoss/application/db/engine.py @@ -7,6 +7,7 @@ from sqlalchemy import create_engine, event from sqlalchemy.engine import Engine +from collectoss.application.environment import SystemEnv from collectoss.application.db.util import catch_operational_error @@ -61,7 +62,7 @@ def get_database_string() -> str: postgres database string """ - db_environment_var = os.getenv("AUGUR_DB") + db_environment_var = SystemEnv.get("COLLECTOSS_DB") try: current_dir = os.getcwd() From 0b46bfbbb57cc152b878674be20b558cc3522580 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 08:55:44 -0400 Subject: [PATCH 37/77] update error messages surrounding DB access to refer to the new variable Signed-off-by: Adrian Edwards --- collectoss/application/cli/__init__.py | 2 +- collectoss/application/db/engine.py | 2 +- collectoss/tasks/git/util/facade_worker/facade_worker/config.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/collectoss/application/cli/__init__.py b/collectoss/application/cli/__init__.py index 18fac2f0a..444473016 100644 --- a/collectoss/application/cli/__init__.py +++ b/collectoss/application/cli/__init__.py @@ -71,7 +71,7 @@ def new_func(ctx, *args, **kwargs): # determine the location to print in error string if db_environment_var: - location = f"the AUGUR_DB environment variable\nAUGUR_DB={SystemEnv.get('COLLECTOSS_DB')}" + location = f"the COLLECTOSS_DB environment variable\nCOLLECTOSS_DB={SystemEnv.get('COLLECTOSS_DB')}" else: with open("db.config.json", 'r') as f: db_config = json.load(f) diff --git a/collectoss/application/db/engine.py b/collectoss/application/db/engine.py index 5aae4466e..884d5a61c 100644 --- a/collectoss/application/db/engine.py +++ b/collectoss/application/db/engine.py @@ -75,7 +75,7 @@ def get_database_string() -> str: if not db_environment_var and not db_json_exists: - print("ERROR no way to get connection to the database. \n\t\t\t\t\t\t There is no db.config.json and the AUGUR_DB environment variable is not set\n\t\t\t\t\t\t Please run make install or set the AUGUR_DB environment then run make install") + print("ERROR no way to get connection to the database. \n\t\t\t\t\t\t There is no db.config.json and the COLLECTOSS_DB environment variable is not set\n\t\t\t\t\t\t Please run make install or set the COLLECTOSS_DB environment then run make install") sys.exit() if db_environment_var: diff --git a/collectoss/tasks/git/util/facade_worker/facade_worker/config.py b/collectoss/tasks/git/util/facade_worker/facade_worker/config.py index f6d5aa465..2b536a3a4 100644 --- a/collectoss/tasks/git/util/facade_worker/facade_worker/config.py +++ b/collectoss/tasks/git/util/facade_worker/facade_worker/config.py @@ -57,7 +57,7 @@ def get_database_args_from_env(): if not db_str and not db_json_exists: - logger.error("ERROR no way to get connection to the database. \n\t\t\t\t\t\t There is no db.config.json and the AUGUR_DB environment variable is not set\n\t\t\t\t\t\t Please run make install or set the AUGUR_DB environment then run make install") + logger.error("ERROR no way to get connection to the database. \n\t\t\t\t\t\t There is no db.config.json and the COLLECTOSS_DB environment variable is not set\n\t\t\t\t\t\t Please run make install or set the COLLECTOSS_DB environment then run make install") sys.exit() credentials = {} From 0b3aa2b3e460b1594d27f36cb8048f317cd0b3e0 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 9 Jun 2026 18:02:00 -0400 Subject: [PATCH 38/77] allow SystemEnv to set a default value Signed-off-by: Adrian Edwards --- collectoss/api/server.py | 5 +++-- collectoss/application/environment.py | 9 ++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/collectoss/api/server.py b/collectoss/api/server.py index a4d212f58..7955cd7a1 100644 --- a/collectoss/api/server.py +++ b/collectoss/api/server.py @@ -27,6 +27,7 @@ from collectoss.application.logs import SystemLogger from collectoss.application.db.session import DatabaseSession from collectoss.application.config import SystemConfig +from collectoss.application.environment import SystemEnv from collectoss.application.db.engine import get_database_string, create_database_engine from collectoss.application.db.models import Repo, Issue, PullRequest, Message, PullRequestReview, Commit, IssueAssignee, PullRequestAssignee, PullRequestCommit, PullRequestFile, Contributor, IssueLabel, PullRequestLabel, ContributorsAlias, Release, ClientApplication @@ -300,8 +301,8 @@ def create_cache_manager() -> CacheManager: cache_config = { 'cache.type': 'file', # Allow setting cache directories via environment variables - 'cache.data_dir': Path(env.setdefault("CACHE_DATADIR", 'runtime/cache/')), - 'cache.lock_dir': Path(env.setdefault("CACHE_LOCKDIR", 'runtime/cache/')), + 'cache.data_dir': Path(SystemEnv.set_default("CACHE_DATADIR", 'runtime/cache/')), + 'cache.lock_dir': Path(SystemEnv.set_default("CACHE_LOCKDIR", 'runtime/cache/')), } if not os.path.exists(cache_config['cache.data_dir']): diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py index 22a8c95d4..eee8942ed 100644 --- a/collectoss/application/environment.py +++ b/collectoss/application/environment.py @@ -76,4 +76,11 @@ def set(cls, key: str, value: str, overwrite=True) -> None: if os.getenv(key) is not None and not overwrite: return - os.environ[key] = value \ No newline at end of file + os.environ[key] = value + + @classmethod + def set_default(cls, key: str, value: str) -> None: + if cls.get(key) is None: + cls.set(key, value) + return value + return cls.get(key) \ No newline at end of file From 4a30c75451cc5db1469c97999b65d5b76764afda Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 09:32:19 -0400 Subject: [PATCH 39/77] create a stub of a helper function to check and init the schema Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 3 +++ collectoss/application/cli/db.py | 4 +++- collectoss/util/startup.py | 12 ++++++++++++ docker/backend/init.sh | 4 ---- 4 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 collectoss/util/startup.py diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index 3526a3c2c..bb180433b 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -30,6 +30,7 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext import sqlalchemy as s +from collectoss.util.startup import check_init_schema from keyman.KeyClient import KeyClient, KeyPublisher reset_logs = SystemEnv.get_bool("AUGUR_RESET_LOGS", True) @@ -61,6 +62,8 @@ def start(ctx, disable_collection, development, pidfile, port): signal.signal(signal.SIGTERM, manager.shutdown_signal_handler) signal.signal(signal.SIGINT, manager.shutdown_signal_handler) + check_init_schema() + try: if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) diff --git a/collectoss/application/cli/db.py b/collectoss/application/cli/db.py index e43e472aa..1827079ec 100644 --- a/collectoss/application/cli/db.py +++ b/collectoss/application/cli/db.py @@ -29,6 +29,7 @@ process_repo_group_csv, ) from collectoss.application.environment import SystemEnv +from collectoss.util.startup import check_init_schema logger = logging.getLogger(__name__) @@ -311,7 +312,8 @@ def create_schema(): """ Create schema in the configured database """ - check_call(["alembic", "upgrade", "head"]) + # check_call(["alembic", "upgrade", "head"]) + check_init_schema() def generate_key(length): diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py new file mode 100644 index 000000000..eb9fb01c5 --- /dev/null +++ b/collectoss/util/startup.py @@ -0,0 +1,12 @@ +## Startup helpers + + +def check_init_schema(): + """Initialize the CollectOSS database schema as appropriate + """ + + pass + # does public.alembic_version exist? + # if yes, do nothing + # if no, do a sanity check to make sure the other schemas dont exist, + # then init the current db with sqlalchemy and stamp the current version with alembic diff --git a/docker/backend/init.sh b/docker/backend/init.sh index 782b8fa53..65470a403 100644 --- a/docker/backend/init.sh +++ b/docker/backend/init.sh @@ -2,10 +2,6 @@ #SPDX-License-Identifier: MIT set -e -if [[ "$AUGUR_DB_SCHEMA_BUILD" == "1" ]]; then - collectoss db create-schema -fi - if [ ! -v AUGUR_NO_CONFIG ]; then ./scripts/docker/config.sh docker From 4cf94bba4d66ef85794f03638ae4a0fd79d02809 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 09:40:54 -0400 Subject: [PATCH 40/77] add stub for making sure the schema is updated this was split into a separate thing Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 3 ++- collectoss/application/cli/db.py | 4 ++-- collectoss/util/startup.py | 8 ++++++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index bb180433b..15b162ced 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -30,7 +30,7 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext import sqlalchemy as s -from collectoss.util.startup import check_init_schema +from collectoss.util.startup import check_init_schema, check_update_schema from keyman.KeyClient import KeyClient, KeyPublisher reset_logs = SystemEnv.get_bool("AUGUR_RESET_LOGS", True) @@ -63,6 +63,7 @@ def start(ctx, disable_collection, development, pidfile, port): signal.signal(signal.SIGINT, manager.shutdown_signal_handler) check_init_schema() + check_update_schema() try: if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": diff --git a/collectoss/application/cli/db.py b/collectoss/application/cli/db.py index 1827079ec..7b6bc7c09 100644 --- a/collectoss/application/cli/db.py +++ b/collectoss/application/cli/db.py @@ -29,7 +29,7 @@ process_repo_group_csv, ) from collectoss.application.environment import SystemEnv -from collectoss.util.startup import check_init_schema +from collectoss.util.startup import check_init_schema, check_update_schema logger = logging.getLogger(__name__) @@ -292,7 +292,7 @@ def upgrade_db_version(): """ Upgrade the configured database to the latest version """ - check_call(["alembic", "upgrade", "head"]) + check_update_schema() @cli.command("check-for-upgrade") diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index eb9fb01c5..359054ab2 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -10,3 +10,11 @@ def check_init_schema(): # if yes, do nothing # if no, do a sanity check to make sure the other schemas dont exist, # then init the current db with sqlalchemy and stamp the current version with alembic + +def check_update_schema(): + """ensure the CollectOSS schema is on the latest version + """ + pass + # alembic upgrade head, unless theres an env var preventing automatic migration + # check_call(["alembic", "upgrade", "head"]) + From 71e74b7b8294e515e4b5557cc245df9349847907 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 11:26:33 -0400 Subject: [PATCH 41/77] move some entrypoint behavior into a variable collection helper Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 6 +++++- collectoss/util/startup.py | 29 +++++++++++++++++++++++++++ docker/backend/entrypoint.sh | 16 --------------- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index 15b162ced..af3e2825d 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -30,7 +30,7 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext import sqlalchemy as s -from collectoss.util.startup import check_init_schema, check_update_schema +from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables from keyman.KeyClient import KeyClient, KeyPublisher reset_logs = SystemEnv.get_bool("AUGUR_RESET_LOGS", True) @@ -62,6 +62,10 @@ def start(ctx, disable_collection, development, pidfile, port): signal.signal(signal.SIGTERM, manager.shutdown_signal_handler) signal.signal(signal.SIGINT, manager.shutdown_signal_handler) + + collect_env_variables(logger) + + check_init_schema() check_update_schema() diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 359054ab2..bda69c00e 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -1,6 +1,9 @@ ## Startup helpers +from collectoss.application.environment import SystemEnv + + def check_init_schema(): """Initialize the CollectOSS database schema as appropriate """ @@ -18,3 +21,29 @@ def check_update_schema(): # alembic upgrade head, unless theres an env var preventing automatic migration # check_call(["alembic", "upgrade", "head"]) +def collect_env_variables(logger): + """convenience helper for assembling more complex environment variables out of smaller ones + and other environment variable convenience operations + """ + + if SystemEnv.get("COLLECTOSS_DB") is None: + names = ["COLLECTOSS_DB_HOST", "COLLECTOSS_DB_USER", "COLLECTOSS_DB_PASSWORD", "COLLECTOSS_DB_NAME"] + values = [SystemEnv.get(n) for n in names] + + if all(map(lambda p: p is not None, values)): + host, user, passwd, name = values + SystemEnv.set("COLLECTOSS_DB", f"postgresql+psycopg2://{user}:{passwd}@{host}/{name}") + else: + logger.warning("CollectOSS was unable to create your database connection string automatically") + logger.warning("The following environment variables are missing:") + for n, v in zip(names, values): + if v is None: + logger.warning(n) + + + + db_string = SystemEnv.get("COLLECTOSS_DB") + if db_string and "localhost" in db_string: + SystemEnv.set("COLLECTOSS_DB", db_string.replace("127.0.0.1", "host.docker.internal")) + elif db_string and "127.0.0.1" in db_string: + SystemEnv.set("COLLECTOSS_DB", db_string.replace("127.0.0.1", "host.docker.internal")) diff --git a/docker/backend/entrypoint.sh b/docker/backend/entrypoint.sh index 78eda49e2..37d5095d5 100644 --- a/docker/backend/entrypoint.sh +++ b/docker/backend/entrypoint.sh @@ -2,22 +2,6 @@ #SPDX-License-Identifier: MIT set -e -if [[ -z "$AUGUR_DB" ]]; then - # If AUGUR_DB is not set, check for individual environment variables and construct AUGUR_DB connection string - if [[ -n "$AUGUR_DB_HOST" ]] && [[ -n "$AUGUR_DB_USER" ]] && [[ -n "$AUGUR_DB_PASSWORD" ]] && [[ -n "$AUGUR_DB_NAME" ]]; then - export AUGUR_DB="postgresql+psycopg2://${AUGUR_DB_USER}:${AUGUR_DB_PASSWORD}@${AUGUR_DB_HOST}/${AUGUR_DB_NAME}" - fi -fi - - -if [[ "$AUGUR_DB" == *"localhost"* ]]; then - echo "localhost db connection" - export AUGUR_DB="${AUGUR_DB/localhost/host.docker.internal}" -elif [[ "$AUGUR_DB" == *"127.0.0.1"* ]]; then - echo "localhost db connection" - export AUGUR_DB="${AUGUR_DB/127.0.0.1/host.docker.internal}" -fi - export AUGUR_FACADE_REPO_DIRECTORY=${AUGUR_FACADE_REPO_DIRECTORY:-/collectoss/facade/} export AUGUR_DOCKER_DEPLOY="1" From 009e09256dc124e3a4f8767f8c4c63eb06b2458d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 11:27:14 -0400 Subject: [PATCH 42/77] Automatically populate gitlab data with nonfunctional values if they are not specified Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 7 +++++++ scripts/docker/config.sh | 42 -------------------------------------- 2 files changed, 7 insertions(+), 42 deletions(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index bda69c00e..44f7fff35 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -47,3 +47,10 @@ def collect_env_variables(logger): SystemEnv.set("COLLECTOSS_DB", db_string.replace("127.0.0.1", "host.docker.internal")) elif db_string and "127.0.0.1" in db_string: SystemEnv.set("COLLECTOSS_DB", db_string.replace("127.0.0.1", "host.docker.internal")) + + # if user didnt specify gitlab credentials, just inject fake ones so we can start up. + if SystemEnv.get("COLLECTOSS_GITLAB_API_KEY") is None: + SystemEnv.set("COLLECTOSS_GITLAB_API_KEY", "fake") + if SystemEnv.get("COLLECTOSS_GITLAB_USERNAME") is None: + SystemEnv.set("COLLECTOSS_GITLAB_USERNAME", "fake") + diff --git a/scripts/docker/config.sh b/scripts/docker/config.sh index 6f92c9a36..5a279d7da 100755 --- a/scripts/docker/config.sh +++ b/scripts/docker/config.sh @@ -50,26 +50,6 @@ function get_github_api_key() { echo } -function get_gitlab_username() { - echo - echo "Please provide your username for GitLab." - echo "** This is required for CollectOSS to clone GitLab repos ***" - read -p "GitLab username: " gitlab_username - blank_confirm gitlab_username - echo -} - -function get_gitlab_api_key() { - echo - echo "Please provide a valid GitLab API key." - echo "For more information on how to create the key, visit:" - echo "https://docs.collectoss.org/en/latest/getting-started/collecting-data.html" - echo "** This is required for CollectOSS to gather data ***" - read -p "GitLab API Key: " gitlab_api_key - blank_confirm gitlab_api_key - echo -} - function get_facade_repo_path() { echo "The Facade data collection worker will clone repositories to this machine to run its analysis." @@ -165,28 +145,6 @@ function create_config() { echo fi - if [[ -z "${AUGUR_GITLAB_API_KEY}" ]]; then - get_gitlab_api_key - else - echo - echo "Found AUGUR_GITLAB_API_KEY environment variable" - echo "Using it in the config" - echo "Please unset AUGUR_GITLAB_API_KEY if you would like to be prompted for a gitlab api key" - gitlab_api_key=$AUGUR_GITLAB_API_KEY - echo - fi - - if [[ -z "${AUGUR_GITLAB_USERNAME}" ]]; then - get_gitlab_username - else - echo - echo "Found AUGUR_GITLAB_USERNAME environment variable" - echo "Using it in the config" - echo "Please unset AUGUR_GITLAB_USERNAME if you would like to be prompted for a gitlab username" - gitlab_username=$AUGUR_GITLAB_USERNAME - echo - fi - if [[ -z "${AUGUR_FACADE_REPO_DIRECTORY}" ]]; then get_facade_repo_path else From c9924ecb013db7ba4bac1fccb42bf43e7e9bb6cb Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 11:27:59 -0400 Subject: [PATCH 43/77] move docker deploy variable to the dockerfile/composefile Signed-off-by: Adrian Edwards --- docker-compose.yml | 1 + docker/backend/Dockerfile | 2 ++ docker/backend/entrypoint.sh | 1 - 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index e1e8ed8da..5673fd5e9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -64,6 +64,7 @@ services: - "COLLECTOSS_GITLAB_API_KEY=${COLLECTOSS_GITLAB_API_KEY}" - "COLLECTOSS_GITHUB_USERNAME=${COLLECTOSS_GITHUB_USERNAME}" - "COLLECTOSS_GITLAB_USERNAME=${COLLECTOSS_GITLAB_USERNAME}" + - COLLECTOSS_DOCKER_DEPLOY=1 - REDIS_CONN_STRING=redis://redis:6379 - RABBITMQ_CONN_STRING=amqp://${COLLECTOSS_RABBITMQ_USERNAME:-augur}:${COLLECTOSS_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${COLLECTOSS_RABBITMQ_VHOST:-collectoss_vhost} - CONFIG_LOCATION=/config/config.yml diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index d3ada5bf0..bcef5b22e 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -33,6 +33,8 @@ LABEL org.opencontainers.image.revision=${REVISION} ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/usr/bin/:/usr/local/bin:/usr/lib:${PATH}" +ENV COLLECTOSS_DOCKER_DEPLOY="1" + RUN set -x \ && apt-get update \ && apt-get -y install --no-install-recommends \ diff --git a/docker/backend/entrypoint.sh b/docker/backend/entrypoint.sh index 37d5095d5..fc51cae87 100644 --- a/docker/backend/entrypoint.sh +++ b/docker/backend/entrypoint.sh @@ -3,7 +3,6 @@ set -e export AUGUR_FACADE_REPO_DIRECTORY=${AUGUR_FACADE_REPO_DIRECTORY:-/collectoss/facade/} -export AUGUR_DOCKER_DEPLOY="1" #Deal with special case where 'localhost' is the machine that started the container if [[ "$REDIS_CONN_STRING" == *"localhost"* ]] || [[ "$REDIS_CONN_STRING" == *"127.0.0.1"* ]]; then From 32033a0d9a307ce11ba4381b0c7b24f5d1ab06ee Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 12:55:22 -0400 Subject: [PATCH 44/77] provide default facade repo directory Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 4 ++++ docker/backend/entrypoint.sh | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 44f7fff35..4c5262580 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -54,3 +54,7 @@ def collect_env_variables(logger): if SystemEnv.get("COLLECTOSS_GITLAB_USERNAME") is None: SystemEnv.set("COLLECTOSS_GITLAB_USERNAME", "fake") + # provide a default value for the facade repo directory (assumes docker paths) + facade_repo_directory = SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") + if facade_repo_directory is None: + SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", "/collectoss/facade/") diff --git a/docker/backend/entrypoint.sh b/docker/backend/entrypoint.sh index fc51cae87..b1a45d851 100644 --- a/docker/backend/entrypoint.sh +++ b/docker/backend/entrypoint.sh @@ -2,8 +2,6 @@ #SPDX-License-Identifier: MIT set -e -export AUGUR_FACADE_REPO_DIRECTORY=${AUGUR_FACADE_REPO_DIRECTORY:-/collectoss/facade/} - #Deal with special case where 'localhost' is the machine that started the container if [[ "$REDIS_CONN_STRING" == *"localhost"* ]] || [[ "$REDIS_CONN_STRING" == *"127.0.0.1"* ]]; then echo "localhost redis connection" From 5fc3eccf8cce50f75a8245ac16975d576946afaa Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 12:56:40 -0400 Subject: [PATCH 45/77] ensure facade repo directory is resolveable/absolute Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 4 ++++ scripts/docker/config.sh | 6 +----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 4c5262580..6f6d1ec35 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -1,6 +1,7 @@ ## Startup helpers +from pathlib import Path from collectoss.application.environment import SystemEnv @@ -58,3 +59,6 @@ def collect_env_variables(logger): facade_repo_directory = SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") if facade_repo_directory is None: SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", "/collectoss/facade/") + else: + # Check if the path is resolveable/make it absolute + SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", str(Path(facade_repo_directory).resolve(strict=True))) diff --git a/scripts/docker/config.sh b/scripts/docker/config.sh index 5a279d7da..2dea09de4 100755 --- a/scripts/docker/config.sh +++ b/scripts/docker/config.sh @@ -57,11 +57,7 @@ function get_facade_repo_path() { echo while true; do - read -e -p "Facade worker directory: " facade_repo_directory - blank_confirm facade_repo_directory - - facade_repo_directory=$(realpath $facade_repo_directory) - echo + # if ! [ -w $facade_repo_directory/.git-credentials ]; then # echo "User $(whoami) does not have permission to write to that location" From 4747b8ca8ea4c9ab2a2423f3520784bbd36418a5 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 12:57:20 -0400 Subject: [PATCH 46/77] ensure facade repo directory has a trailing slash Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 6 ++++++ scripts/docker/config.sh | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 6f6d1ec35..409fa117b 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -62,3 +62,9 @@ def collect_env_variables(logger): else: # Check if the path is resolveable/make it absolute SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", str(Path(facade_repo_directory).resolve(strict=True))) + + # ensure trailing slash is present + facade_repo_directory = SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") + if facade_repo_directory and not facade_repo_directory.endswith("/"): + facade_repo_directory += "/" + SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", facade_repo_directory) diff --git a/scripts/docker/config.sh b/scripts/docker/config.sh index 2dea09de4..c0e752d02 100755 --- a/scripts/docker/config.sh +++ b/scripts/docker/config.sh @@ -104,8 +104,6 @@ function get_facade_repo_path() { esac fi done - - [[ "${facade_repo_directory}" != */ ]] && facade_repo_directory="${facade_repo_directory}/" } function get_rabbitmq_broker_url() { From 1d2910baf8e2847ec398938a58fd76527ca370a8 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 12:58:04 -0400 Subject: [PATCH 47/77] oops Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 409fa117b..dfeb8e186 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -45,7 +45,7 @@ def collect_env_variables(logger): db_string = SystemEnv.get("COLLECTOSS_DB") if db_string and "localhost" in db_string: - SystemEnv.set("COLLECTOSS_DB", db_string.replace("127.0.0.1", "host.docker.internal")) + SystemEnv.set("COLLECTOSS_DB", db_string.replace("localhost", "host.docker.internal")) elif db_string and "127.0.0.1" in db_string: SystemEnv.set("COLLECTOSS_DB", db_string.replace("127.0.0.1", "host.docker.internal")) From ba802c289112c5a35af842df02ad7df6311e342d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 13:00:44 -0400 Subject: [PATCH 48/77] handle swapping localhosts for redis connection string too Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 7 +++++++ docker/backend/entrypoint.sh | 8 -------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index dfeb8e186..fee1d8e1a 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -49,6 +49,13 @@ def collect_env_variables(logger): elif db_string and "127.0.0.1" in db_string: SystemEnv.set("COLLECTOSS_DB", db_string.replace("127.0.0.1", "host.docker.internal")) + redis_string = SystemEnv.get("REDIS_CONN_STRING") + if redis_string and "localhost" in redis_string: + SystemEnv.set("REDIS_CONN_STRING", redis_string.replace("localhost", "host.docker.internal")) + elif redis_string and "127.0.0.1" in redis_string: + SystemEnv.set("REDIS_CONN_STRING", redis_string.replace("127.0.0.1", "host.docker.internal")) + + # if user didnt specify gitlab credentials, just inject fake ones so we can start up. if SystemEnv.get("COLLECTOSS_GITLAB_API_KEY") is None: SystemEnv.set("COLLECTOSS_GITLAB_API_KEY", "fake") diff --git a/docker/backend/entrypoint.sh b/docker/backend/entrypoint.sh index b1a45d851..5a6c63fa0 100644 --- a/docker/backend/entrypoint.sh +++ b/docker/backend/entrypoint.sh @@ -2,12 +2,4 @@ #SPDX-License-Identifier: MIT set -e -#Deal with special case where 'localhost' is the machine that started the container -if [[ "$REDIS_CONN_STRING" == *"localhost"* ]] || [[ "$REDIS_CONN_STRING" == *"127.0.0.1"* ]]; then - echo "localhost redis connection" - export redis_conn_string="redis://host.docker.internal:6379" -else - export redis_conn_string=$REDIS_CONN_STRING -fi - exec "$@" From 7e5fc1881c3094fe9423c170ff6d70cb19e2f4b6 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 13:01:19 -0400 Subject: [PATCH 49/77] entrypoint is now empty Signed-off-by: Adrian Edwards --- docker/backend/Dockerfile | 1 - docker/backend/entrypoint.sh | 5 ----- 2 files changed, 6 deletions(-) delete mode 100644 docker/backend/entrypoint.sh diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index bcef5b22e..e45559b66 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -117,5 +117,4 @@ RUN ln -s /cache /collectoss/collectoss/static/cache COPY --chmod=755 ./docker/backend/entrypoint.sh / COPY --chmod=755 ./docker/backend/init.sh / RUN chmod +x /entrypoint.sh /init.sh -ENTRYPOINT ["/bin/bash", "/entrypoint.sh"] CMD ["/init.sh"] diff --git a/docker/backend/entrypoint.sh b/docker/backend/entrypoint.sh deleted file mode 100644 index 5a6c63fa0..000000000 --- a/docker/backend/entrypoint.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -#SPDX-License-Identifier: MIT -set -e - -exec "$@" From e4c13ef231404aba232e6aaa90691b9403e7b9ef Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 13:29:16 -0400 Subject: [PATCH 50/77] provide excessive user feedback with verbose logging Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index fee1d8e1a..ab2eb5cc9 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -33,6 +33,7 @@ def collect_env_variables(logger): if all(map(lambda p: p is not None, values)): host, user, passwd, name = values + logger.verbose(f"Assembling COLLECTOSS_DB string from provided variables") SystemEnv.set("COLLECTOSS_DB", f"postgresql+psycopg2://{user}:{passwd}@{host}/{name}") else: logger.warning("CollectOSS was unable to create your database connection string automatically") @@ -45,29 +46,37 @@ def collect_env_variables(logger): db_string = SystemEnv.get("COLLECTOSS_DB") if db_string and "localhost" in db_string: + logger.verbose(f"Swapping localhost in COLLECTOSS_DB string with docker host gateway name") SystemEnv.set("COLLECTOSS_DB", db_string.replace("localhost", "host.docker.internal")) elif db_string and "127.0.0.1" in db_string: + logger.verbose(f"Swapping 127.0.0.1 in COLLECTOSS_DB string with docker host gateway name") SystemEnv.set("COLLECTOSS_DB", db_string.replace("127.0.0.1", "host.docker.internal")) redis_string = SystemEnv.get("REDIS_CONN_STRING") if redis_string and "localhost" in redis_string: + logger.verbose(f"Swapping localhost in REDIS_CONN_STRING with docker host gateway name") SystemEnv.set("REDIS_CONN_STRING", redis_string.replace("localhost", "host.docker.internal")) elif redis_string and "127.0.0.1" in redis_string: + logger.verbose(f"Swapping 127.0.0.1 in REDIS_CONN_STRING with docker host gateway name") SystemEnv.set("REDIS_CONN_STRING", redis_string.replace("127.0.0.1", "host.docker.internal")) # if user didnt specify gitlab credentials, just inject fake ones so we can start up. if SystemEnv.get("COLLECTOSS_GITLAB_API_KEY") is None: + logger.verbose(f"Detected no specified gitlab key, using made up values as a workaround") SystemEnv.set("COLLECTOSS_GITLAB_API_KEY", "fake") if SystemEnv.get("COLLECTOSS_GITLAB_USERNAME") is None: + logger.verbose(f"Detected no specified gitlab username, using made up value as a workaround") SystemEnv.set("COLLECTOSS_GITLAB_USERNAME", "fake") # provide a default value for the facade repo directory (assumes docker paths) facade_repo_directory = SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") if facade_repo_directory is None: + logger.verbose(f"Setting default value for COLLECTOSS_FACADE_REPO_DIRECTORY") SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", "/collectoss/facade/") else: # Check if the path is resolveable/make it absolute + logger.verbose(f"Resolving full path to COLLECTOSS_FACADE_REPO_DIRECTORY") SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", str(Path(facade_repo_directory).resolve(strict=True))) # ensure trailing slash is present From f7f7cb7a29291cd892187b5907c4a7f0e4bf15bb Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 13:31:06 -0400 Subject: [PATCH 51/77] port over the facade config file handling into python itll start as deprecated since there are already plans to make this better. Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 4 +- collectoss/util/startup.py | 51 ++++++++++++++++++++- scripts/docker/config.sh | 64 --------------------------- 3 files changed, 53 insertions(+), 66 deletions(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index af3e2825d..f9e83448f 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -30,7 +30,7 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext import sqlalchemy as s -from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables +from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables, setup_facade_directory from keyman.KeyClient import KeyClient, KeyPublisher reset_logs = SystemEnv.get_bool("AUGUR_RESET_LOGS", True) @@ -69,6 +69,8 @@ def start(ctx, disable_collection, development, pidfile, port): check_init_schema() check_update_schema() + setup_facade_directory() + try: if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index ab2eb5cc9..5b2befd3c 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -2,8 +2,13 @@ from pathlib import Path -from collectoss.application.environment import SystemEnv +import os +import getpass +import subprocess +from sqlalchemy.orm.attributes import get_history +from collectoss.application.environment import SystemEnv +from typing_extensions import deprecated def check_init_schema(): """Initialize the CollectOSS database schema as appropriate @@ -84,3 +89,47 @@ def collect_env_variables(logger): if facade_repo_directory and not facade_repo_directory.endswith("/"): facade_repo_directory += "/" SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", facade_repo_directory) + +@deprecated("The bulk of this function is handling .git-credentials, which will be replaced with pygit2 (see issue #258)", category=None) +def setup_facade_directory(logger): + """Perform permission checks and create the facade directory if it doesnt exist + """ + + facade_directory_path = SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") or "/collectoss/facade/" + + facade_directory = Path(facade_directory_path) + + if not facade_directory.exists(): + logger.verbose(f"Specified facade directory {facade_directory_path} does not exist. Creating...") + facade_directory.mkdir() + + git_credentials = facade_directory.joinpath(".git-credentials") + git_credentials.touch(exist_ok=True) + + if not os.access(git_credentials, os.R_OK): + logger.error(f"User {getpass.getuser()} does not have permission to write to {git_credentials}. Please select another location") + else: + logger.verbose(f"Permission check passed for {git_credentials}") + + + credentials = [] + + gh_names = ["COLLECTOSS_GITHUB_USERNAME","COLLECTOSS_GITHUB_API_KEY"] + gh_values = [SystemEnv.get(n) for n in gh_names] + + if all(map(lambda p: p is not None, gh_values)): + user, key = gh_values + credentials.append(f"https://{user}:{key}@github.com") + + + gl_names = ["COLLECTOSS_GITLAB_USERNAME","COLLECTOSS_GITLAB_API_KEY"] + gl_values = [SystemEnv.get(n) for n in gl_names] + + if all(map(lambda p: p is not None, gl_values)): + user, key = gl_values + credentials.append(f"https://{user}:{key}@gitlab.com") + + with git_credentials.open(encoding="utf-8") as c: + c.writelines(credentials) + + subprocess.call(["git", "config", "--global", "credential.helper", "store", "--file", str(git_credentials)]) \ No newline at end of file diff --git a/scripts/docker/config.sh b/scripts/docker/config.sh index c0e752d02..6d9477fe3 100755 --- a/scripts/docker/config.sh +++ b/scripts/docker/config.sh @@ -50,62 +50,6 @@ function get_github_api_key() { echo } -function get_facade_repo_path() { - - echo "The Facade data collection worker will clone repositories to this machine to run its analysis." - echo "Please select a new or existing directory for the Facade worker to use:" - echo - - while true; do - - - # if ! [ -w $facade_repo_directory/.git-credentials ]; then - # echo "User $(whoami) does not have permission to write to that location" - # echo "Please select another location" - # continue - # fi - - # Check if the file exists and create it if it doesn't - if [ ! -f "$facade_repo_directory/.git-credentials" ]; then - echo "File .git-credentials does not exist. Creating it..." - touch "$facade_repo_directory/.git-credentials" - fi - - # Check for write permissions - if ! [ -w "$facade_repo_directory/.git-credentials" ]; then - echo "User $(whoami) does not have permission to write to $facade_repo_directory/.git-credentials" - echo "Please select another location" - continue - else - echo "Permission check passed for $facade_repo_directory/.git-credentials" - fi - - if [[ -d "$facade_repo_directory" ]]; then - read -r -p "That directory already exists. Use it? [Y/n]: " facade_response - case "$facade_response" in - [nN][oO] | [nN]) - continue - ;; - *) - break - ;; - esac - else - read -r -p "That directory does not exist. Create it? [Y/n]: " facade_response - case "$facade_response" in - [nN][oO] | [nN]) - continue - ;; - *) - mkdir "$facade_repo_directory" - echo "Directory created." - break - ;; - esac - fi - done -} - function get_rabbitmq_broker_url() { echo echo "Please provide your rabbitmq broker url." @@ -176,14 +120,6 @@ function create_config() { else cmd=( collectoss config init --github-api-key $github_api_key --gitlab-api-key $gitlab_api_key --facade-repo-directory $facade_repo_directory --rabbitmq-conn-string $rabbitmq_conn_string ) fi - - #Create and cache credentials for github and gitlab - touch $facade_repo_directory/.git-credentials - - echo "https://$github_username:$github_api_key@github.com" > $facade_repo_directory/.git-credentials - echo "https://$gitlab_username:$gitlab_api_key@gitlab.com" >> $facade_repo_directory/.git-credentials - - git config --global credential.helper "store --file $facade_repo_directory/.git-credentials" "${cmd[@]}" } echo From ac5a57deadf0d99d183b2cbddaa67ff798b80ad0 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 13:59:34 -0400 Subject: [PATCH 52/77] refactor config merging into a separate function so it could be reused from two places in the CLI Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 1 + collectoss/application/cli/config.py | 51 +--------------- collectoss/util/startup.py | 84 ++++++++++++++++++++++++++- 3 files changed, 87 insertions(+), 49 deletions(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index f9e83448f..8a691e69f 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -71,6 +71,7 @@ def start(ctx, disable_collection, development, pidfile, port): setup_facade_directory() + merge_config(ctx.obj.engine, logger) try: if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) diff --git a/collectoss/application/cli/config.py b/collectoss/application/cli/config.py index 50641439e..681c9d201 100644 --- a/collectoss/application/cli/config.py +++ b/collectoss/application/cli/config.py @@ -11,8 +11,7 @@ from collectoss.application.db.session import DatabaseSession from collectoss.application.config import SystemConfig, redact_setting_value from collectoss.application.cli import DatabaseContext, test_connection, test_db_connection, with_database -from collectoss.util.inspect_without_import import get_phase_names_without_import -ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) +from collectoss.util.startup import merge_config logger = logging.getLogger(__name__) @@ -61,52 +60,8 @@ def init_config(ctx, github_api_key, facade_repo_directory, gitlab_api_key, redi if facade_repo_directory[-1] != "/": facade_repo_directory += "/" - - keys = {} - - keys["github_api_key"] = github_api_key - keys["gitlab_api_key"] = gitlab_api_key - - with DatabaseSession(logger, engine=ctx.obj.engine) as session: - - config = SystemConfig(logger, session) - - augmented_config = config.base_config - - phase_names = get_phase_names_without_import() - - #Add all phases as enabled by default - for name in phase_names: - - if name not in augmented_config['Task_Routine']: - augmented_config['Task_Routine'].update({name : 1}) - - #print(default_config) - if redis_conn_string: - - try: - redis_string_array = redis_conn_string.split("/") - cache_number = int(redis_string_array[-1]) - digits = len(str(cache_number)) - - redis_conn_string = redis_conn_string[:-digits] - - except ValueError: - pass - - augmented_config["Redis"]["connection_string"] = redis_conn_string - - if rabbitmq_conn_string: - augmented_config["RabbitMQ"]["connection_string"] = rabbitmq_conn_string - - augmented_config["Keys"] = keys - - augmented_config["Facade"]["repo_directory"] = facade_repo_directory - - augmented_config["Logging"]["logs_directory"] = logs_directory or (ROOT_PROJECT_REPO_DIRECTORY + "/logs/") - - config.load_config_from_dict(augmented_config) - + merge_config(ctx.obj.engine, logger, github_api_key, facade_repo_directory, gitlab_api_key, redis_conn_string, rabbitmq_conn_string, logs_directory) + @cli.command('load') @click.option('--file', required=True) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 5b2befd3c..b9a66f9c9 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -7,9 +7,15 @@ import subprocess from sqlalchemy.orm.attributes import get_history +from collectoss.application.config import SystemConfig +from collectoss.application.db.session import DatabaseSession from collectoss.application.environment import SystemEnv from typing_extensions import deprecated +from collectoss.util.inspect_without_import import get_phase_names_without_import + +ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) + def check_init_schema(): """Initialize the CollectOSS database schema as appropriate """ @@ -132,4 +138,80 @@ def setup_facade_directory(logger): with git_credentials.open(encoding="utf-8") as c: c.writelines(credentials) - subprocess.call(["git", "config", "--global", "credential.helper", "store", "--file", str(git_credentials)]) \ No newline at end of file + subprocess.call(["git", "config", "--global", "credential.helper", "store", "--file", str(git_credentials)]) + + +def merge_config( + engine, + logger, + github_api_key:str | None = None, + facade_repo_directory:str | None = None, + gitlab_api_key:str | None = None, + redis_conn_string:str | None = None, + rabbitmq_conn_string:str | None = None, + logs_directory:str | None = None + ): + """Merge config items provided via environment variables into a place where SystemConfig can pick them up. + + Args: + engine: the database engine to connect to + logger: object to use for outputting logging messages + github_api_key (str): config value + facade_repo_directory (str): config value + gitlab_api_key (str): config value + redis_conn_string (str): config value + rabbitmq_conn_string (str): config value + logs_directory (str): config value + """ + + github_api_key = github_api_key or SystemEnv.get("COLLECTOSS_GITHUB_API_KEY") + facade_repo_directory = github_api_key or SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") + gitlab_api_key = github_api_key or SystemEnv.get("COLLECTOSS_GITLAB_API_KEY") + redis_conn_string = github_api_key or SystemEnv.get("REDIS_CONN_STRING") + rabbitmq_conn_string = github_api_key or SystemEnv.get("RABBITMQ_CONN_STRING") + logs_directory = github_api_key or SystemEnv.get("COLLECTOSS_LOGS_DIRECTORY") + + keys = {} + + keys["github_api_key"] = github_api_key + keys["gitlab_api_key"] = gitlab_api_key + + with DatabaseSession(logger, engine=engine) as session: + + config = SystemConfig(logger, session) + + augmented_config = config.base_config + + phase_names = get_phase_names_without_import() + + #Add all phases as enabled by default + for name in phase_names: + + if name not in augmented_config['Task_Routine']: + augmented_config['Task_Routine'].update({name : 1}) + + #print(default_config) + if redis_conn_string: + + try: + redis_string_array = redis_conn_string.split("/") + cache_number = int(redis_string_array[-1]) + digits = len(str(cache_number)) + + redis_conn_string = redis_conn_string[:-digits] + + except ValueError: + pass + + augmented_config["Redis"]["connection_string"] = redis_conn_string + + if rabbitmq_conn_string: + augmented_config["RabbitMQ"]["connection_string"] = rabbitmq_conn_string + + augmented_config["Keys"] = keys + + augmented_config["Facade"]["repo_directory"] = facade_repo_directory + + augmented_config["Logging"]["logs_directory"] = logs_directory or (ROOT_PROJECT_REPO_DIRECTORY + "/logs/") + + config.load_config_from_dict(augmented_config) From a8280c27565b5e6d4de85ded0260e8659fcb65df Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Jun 2026 14:00:30 -0400 Subject: [PATCH 53/77] config script no longer needed Signed-off-by: Adrian Edwards --- docker/backend/init.sh | 5 -- scripts/docker/config.sh | 132 --------------------------------------- 2 files changed, 137 deletions(-) delete mode 100755 scripts/docker/config.sh diff --git a/docker/backend/init.sh b/docker/backend/init.sh index 65470a403..ce1a11f75 100644 --- a/docker/backend/init.sh +++ b/docker/backend/init.sh @@ -2,11 +2,6 @@ #SPDX-License-Identifier: MIT set -e - -if [ ! -v AUGUR_NO_CONFIG ]; then - ./scripts/docker/config.sh docker -fi - if [[ -f /repo_groups.csv ]]; then collectoss db add-repo-groups /repo_groups.csv fi diff --git a/scripts/docker/config.sh b/scripts/docker/config.sh deleted file mode 100755 index 6d9477fe3..000000000 --- a/scripts/docker/config.sh +++ /dev/null @@ -1,132 +0,0 @@ -#!/bin/bash - -PS3=" -Please type the number corresponding to your selection and then press the Enter/Return key. -Your choice: " - -target=$1 - -function blank_confirm() { - if [ -z "${1}" ]; then - echo "Bad usage of blank_confirm at:" - caller - return - fi - - confirm_placeholder=${!1} - - while [ -z "${confirm_placeholder}" ]; do - echo "You entered a blank line, are you sure?" - read -p "enter 'yes' to continue, or enter the intended value: " confirm_placeholder - case "$confirm_placeholder" in - [yY][eE][sS] | [yY][eE] | [yY]) - return - ;; - *) - continue - ;; - esac - done - printf -v "$1" "%s" $confirm_placeholder -} - -function get_github_username() { - echo - echo "Please provide your username for Github." - echo "** This is required for CollectOSS to clone Github repos ***" - read -p "GitHub username: " github_username - blank_confirm github_username - echo -} - -function get_github_api_key() { - echo - echo "Please provide a valid GitHub API key." - echo "For more information on how to create the key, visit:" - echo "https://docs.collectoss.org/en/latest/getting-started/collecting-data.html" - echo "** This is required for CollectOSS to gather data ***" - read -p "GitHub API Key: " github_api_key - blank_confirm github_api_key - echo -} - -function get_rabbitmq_broker_url() { - echo - echo "Please provide your rabbitmq broker url." - echo "** This is required for CollectOSS to run all collection tasks. ***" - read -p "broker_url: " rabbitmq_conn_string - blank_confirm rabbitmq_conn_string - echo -} - -function create_config() { - - if [[ -z "${AUGUR_GITHUB_API_KEY}" ]]; then - get_github_api_key - else - echo - echo "Found AUGUR_GITHUB_API_KEY environment variable" - echo "Using it in the config" - echo "Please unset AUGUR_GITHUB_API_KEY if you would like to be prompted for a github api key" - github_api_key=$AUGUR_GITHUB_API_KEY - echo - fi - - if [[ -z "${AUGUR_GITHUB_USERNAME}" ]]; then - get_github_username - else - echo - echo "Found AUGUR_GITHUB_USERNAME environment variable" - echo "Using it in the config" - echo "Please unset AUGUR_GITHUB_USERNAME if you would like to be prompted for a github username" - github_username=$AUGUR_GITHUB_USERNAME - echo - fi - - if [[ -z "${AUGUR_FACADE_REPO_DIRECTORY}" ]]; then - get_facade_repo_path - else - echo - echo "Found AUGUR_FACADE_REPO_DIRECTORY environment variable with value $AUGUR_FACADE_REPO_DIRECTORY" - echo "Using it in the config" - echo "IMPORTANT NOTE: This assumes that this directory already exists" - echo "Please unset AUGUR_FACADE_REPO_DIRECTORY if you would like to be prompted for the facade repo directory" - facade_repo_directory=$AUGUR_FACADE_REPO_DIRECTORY - echo - fi - - if [[ -z "${RABBITMQ_CONN_STRING}" ]]; then - get_rabbitmq_broker_url - else - echo - echo "Found RABBITMQ_CONN_STRING environment variable with value $RABBITMQ_CONN_STRING" - echo "Using it in the config" - echo "Please unset RABBITMQ_CONN_STRING if you would like to be prompted for the rabbit MQ connection string" - rabbitmq_conn_string=$RABBITMQ_CONN_STRING - echo - fi - - # echo $rabbitmq_conn_string - # echo $facade_repo_directory - # echo $gitlab_username - # echo $gitlab_api_key - # echo $github_username - # echo $github_api_key - - #special case for docker entrypoint - if [ $target = "docker" ]; then - cmd=( collectoss config init --github-api-key $github_api_key --gitlab-api-key $gitlab_api_key --facade-repo-directory $facade_repo_directory --redis-conn-string $redis_conn_string --rabbitmq-conn-string $rabbitmq_conn_string --logs-directory /logs) - echo "init with redis $redis_conn_string" - else - cmd=( collectoss config init --github-api-key $github_api_key --gitlab-api-key $gitlab_api_key --facade-repo-directory $facade_repo_directory --rabbitmq-conn-string $rabbitmq_conn_string ) - fi - "${cmd[@]}" -} -echo -echo "Collecting data for config..." -create_config -echo -echo "Config created" -echo - -# config_prompt From 2bf5f8baf23f6ae5e2a4aa2bdacdd868981662f3 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 5 Jun 2026 09:30:57 -0400 Subject: [PATCH 54/77] enable the basic schema update subshell for now Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index b9a66f9c9..3a4820483 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -5,6 +5,7 @@ import os import getpass import subprocess +from subprocess import check_call from sqlalchemy.orm.attributes import get_history from collectoss.application.config import SystemConfig @@ -31,7 +32,7 @@ def check_update_schema(): """ pass # alembic upgrade head, unless theres an env var preventing automatic migration - # check_call(["alembic", "upgrade", "head"]) + check_call(["alembic", "upgrade", "head"]) def collect_env_variables(logger): """convenience helper for assembling more complex environment variables out of smaller ones From d6175a881d20085d967cfc851e2e01c9a14922d3 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 5 Jun 2026 11:31:32 -0400 Subject: [PATCH 55/77] add a platform information function Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 5 ++++- collectoss/util/startup.py | 9 +++++++++ docker/backend/init.sh | 5 ----- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index 8a691e69f..e1c38c8a2 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -30,7 +30,7 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext import sqlalchemy as s -from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables, setup_facade_directory +from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables, print_platform_information, setup_facade_directory from keyman.KeyClient import KeyClient, KeyPublisher reset_logs = SystemEnv.get_bool("AUGUR_RESET_LOGS", True) @@ -72,6 +72,9 @@ def start(ctx, disable_collection, development, pidfile, port): setup_facade_directory() merge_config(ctx.obj.engine, logger) + + print_platform_information() + try: if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 3a4820483..74edad2d2 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -6,6 +6,8 @@ import getpass import subprocess from subprocess import check_call +import platform +import sys from sqlalchemy.orm.attributes import get_history from collectoss.application.config import SystemConfig @@ -216,3 +218,10 @@ def merge_config( augmented_config["Logging"]["logs_directory"] = logs_directory or (ROOT_PROJECT_REPO_DIRECTORY + "/logs/") config.load_config_from_dict(augmented_config) + + +def print_platform_information(): + + print(f"PATH: {os.environ.get('PATH')}") + print(f"Python executable (current): {sys.executable}") + print(f"Python version: {platform.python_version()}") \ No newline at end of file diff --git a/docker/backend/init.sh b/docker/backend/init.sh index ce1a11f75..51d3abddd 100644 --- a/docker/backend/init.sh +++ b/docker/backend/init.sh @@ -9,9 +9,4 @@ fi if [[ -f /repos.csv ]]; then collectoss db add-repos /repos.csv fi - -echo "PATH: $PATH" -echo "Python executable: $(which python)" -python --version - exec collectoss backend start --pidfile /tmp/main.pid From ad04c9062bcb71371b0e3ddc7502f96479669e88 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 8 Jun 2026 16:16:46 -0400 Subject: [PATCH 56/77] include new environment file in linted files list Signed-off-by: Adrian Edwards --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6a99aacf5..5671e21de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -163,7 +163,10 @@ markers = [ [tool.mypy] -files = ['collectoss/application/db/*.py'] +files = [ + 'collectoss/application/db/*.py', + 'collectoss/application/environment.py', +] ignore_missing_imports = true follow_imports = "skip" disallow_untyped_defs = false From b10041529374bbc44c2d6fec7cde9b347723b381 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 8 Jun 2026 16:59:27 -0400 Subject: [PATCH 57/77] add function to detect and warn if files are present at the previous autoimport locations Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 4 +++- collectoss/util/startup.py | 18 ++++++++++++++++++ docker/backend/init.sh | 7 ------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index e1c38c8a2..c2dffd4cf 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -30,7 +30,7 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext import sqlalchemy as s -from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables, print_platform_information, setup_facade_directory +from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables, print_platform_information, setup_facade_directory, warn_import_repos from keyman.KeyClient import KeyClient, KeyPublisher reset_logs = SystemEnv.get_bool("AUGUR_RESET_LOGS", True) @@ -73,6 +73,8 @@ def start(ctx, disable_collection, development, pidfile, port): merge_config(ctx.obj.engine, logger) + warn_import_repos() + print_platform_information() try: diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 74edad2d2..c7aa74c60 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -220,6 +220,24 @@ def merge_config( config.load_config_from_dict(augmented_config) +@deprecated("automatic import is deprecated. This is a function to warn users and help them transition") +def warn_import_repos(logger): + """We are choosing not to auto import repos and repo groups automatically + This function detects attempts to use the automatic feature and warns users to use the CLI themselves + + Args: + logger: the logger to use + """ + + if Path("/repo_groups.csv").exists(): + logger.warning("Detected /repo_groups.csv file at startup. Automatic import of repo groups is deprecated.") + logger.warning("To import repo groups from a CSV, use the CLI: collectoss db add-repo-groups /repo_groups.csv") + + if Path("/repos.csv").exists(): + logger.warning("Detected /repos.csv file at startup. Automatic import of repos is deprecated.") + logger.warning("To import repos from a CSV, use the CLI: collectoss db add-repos /repos.csv") + + def print_platform_information(): print(f"PATH: {os.environ.get('PATH')}") diff --git a/docker/backend/init.sh b/docker/backend/init.sh index 51d3abddd..ee624c63f 100644 --- a/docker/backend/init.sh +++ b/docker/backend/init.sh @@ -2,11 +2,4 @@ #SPDX-License-Identifier: MIT set -e -if [[ -f /repo_groups.csv ]]; then - collectoss db add-repo-groups /repo_groups.csv -fi - -if [[ -f /repos.csv ]]; then - collectoss db add-repos /repos.csv -fi exec collectoss backend start --pidfile /tmp/main.pid From fb2930eadebc3f9c76f1d9ff6862328ee1605f97 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 8 Jun 2026 17:02:33 -0400 Subject: [PATCH 58/77] move remaining command in init into the docker init directive Signed-off-by: Adrian Edwards --- docker/backend/Dockerfile | 2 +- docker/backend/init.sh | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) delete mode 100644 docker/backend/init.sh diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index e45559b66..63a81effb 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -117,4 +117,4 @@ RUN ln -s /cache /collectoss/collectoss/static/cache COPY --chmod=755 ./docker/backend/entrypoint.sh / COPY --chmod=755 ./docker/backend/init.sh / RUN chmod +x /entrypoint.sh /init.sh -CMD ["/init.sh"] +CMD ["collectoss", "backend", "start", "--pidfile", "/tmp/main.pid"] diff --git a/docker/backend/init.sh b/docker/backend/init.sh deleted file mode 100644 index ee624c63f..000000000 --- a/docker/backend/init.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -#SPDX-License-Identifier: MIT -set -e - -exec collectoss backend start --pidfile /tmp/main.pid From 62dcbc72ccea01a24357ac135a1c616d73c0c29a Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 8 Jun 2026 17:07:38 -0400 Subject: [PATCH 59/77] remove old dockerfile entrypoint and init script infrastructure Signed-off-by: Adrian Edwards --- docker/backend/Dockerfile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 63a81effb..65b2ccdbb 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -113,8 +113,4 @@ RUN ${SCORECARD_DIR}/scorecard version RUN mkdir -p repos/ logs/ /collectoss/facade/ RUN ln -s /cache /collectoss/collectoss/static/cache -# Copy in the entrypoint and init scripts, ensuring they are executable -COPY --chmod=755 ./docker/backend/entrypoint.sh / -COPY --chmod=755 ./docker/backend/init.sh / -RUN chmod +x /entrypoint.sh /init.sh CMD ["collectoss", "backend", "start", "--pidfile", "/tmp/main.pid"] From eab204c708da4574bda982869551907ff15342a9 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 8 Jun 2026 17:13:18 -0400 Subject: [PATCH 60/77] revert keyman orchestrator back to regular environment variables so it doesn't import CollectOSS Signed-off-by: Adrian Edwards --- keyman/Orchestrator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/keyman/Orchestrator.py b/keyman/Orchestrator.py index d93a1f064..71cfae8bb 100644 --- a/keyman/Orchestrator.py +++ b/keyman/Orchestrator.py @@ -4,16 +4,15 @@ import time from keyman.KeyOrchestrationAPI import spec, WaitKeyTimeout, InvalidRequest -from collectoss.application.environment import SystemEnv -if SystemEnv.get("KEYMAN_DOCKER"): +if os.environ.get("KEYMAN_DOCKER"): import sys import redis import logging sys.path.append("/collectoss") - conn = redis.Redis.from_url(SystemEnv.get("REDIS_CONN_STRING")) + conn = redis.Redis.from_url(os.environ.get("REDIS_CONN_STRING")) # Just log to stdout if we're running in docker logger = logging.Logger("KeyOrchestrator") From 6934d7c84523e837d01948a4bdd3d3940e124546 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 8 Jun 2026 17:34:56 -0400 Subject: [PATCH 61/77] logger.verbose -> logger.debug Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index c7aa74c60..392df2df3 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -47,7 +47,7 @@ def collect_env_variables(logger): if all(map(lambda p: p is not None, values)): host, user, passwd, name = values - logger.verbose(f"Assembling COLLECTOSS_DB string from provided variables") + logger.debug(f"Assembling COLLECTOSS_DB string from provided variables") SystemEnv.set("COLLECTOSS_DB", f"postgresql+psycopg2://{user}:{passwd}@{host}/{name}") else: logger.warning("CollectOSS was unable to create your database connection string automatically") @@ -60,37 +60,37 @@ def collect_env_variables(logger): db_string = SystemEnv.get("COLLECTOSS_DB") if db_string and "localhost" in db_string: - logger.verbose(f"Swapping localhost in COLLECTOSS_DB string with docker host gateway name") + logger.debug(f"Swapping localhost in COLLECTOSS_DB string with docker host gateway name") SystemEnv.set("COLLECTOSS_DB", db_string.replace("localhost", "host.docker.internal")) elif db_string and "127.0.0.1" in db_string: - logger.verbose(f"Swapping 127.0.0.1 in COLLECTOSS_DB string with docker host gateway name") + logger.debug(f"Swapping 127.0.0.1 in COLLECTOSS_DB string with docker host gateway name") SystemEnv.set("COLLECTOSS_DB", db_string.replace("127.0.0.1", "host.docker.internal")) redis_string = SystemEnv.get("REDIS_CONN_STRING") if redis_string and "localhost" in redis_string: - logger.verbose(f"Swapping localhost in REDIS_CONN_STRING with docker host gateway name") + logger.debug(f"Swapping localhost in REDIS_CONN_STRING with docker host gateway name") SystemEnv.set("REDIS_CONN_STRING", redis_string.replace("localhost", "host.docker.internal")) elif redis_string and "127.0.0.1" in redis_string: - logger.verbose(f"Swapping 127.0.0.1 in REDIS_CONN_STRING with docker host gateway name") + logger.debug(f"Swapping 127.0.0.1 in REDIS_CONN_STRING with docker host gateway name") SystemEnv.set("REDIS_CONN_STRING", redis_string.replace("127.0.0.1", "host.docker.internal")) # if user didnt specify gitlab credentials, just inject fake ones so we can start up. if SystemEnv.get("COLLECTOSS_GITLAB_API_KEY") is None: - logger.verbose(f"Detected no specified gitlab key, using made up values as a workaround") + logger.debug(f"Detected no specified gitlab key, using made up values as a workaround") SystemEnv.set("COLLECTOSS_GITLAB_API_KEY", "fake") if SystemEnv.get("COLLECTOSS_GITLAB_USERNAME") is None: - logger.verbose(f"Detected no specified gitlab username, using made up value as a workaround") + logger.debug(f"Detected no specified gitlab username, using made up value as a workaround") SystemEnv.set("COLLECTOSS_GITLAB_USERNAME", "fake") # provide a default value for the facade repo directory (assumes docker paths) facade_repo_directory = SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") if facade_repo_directory is None: - logger.verbose(f"Setting default value for COLLECTOSS_FACADE_REPO_DIRECTORY") + logger.debug(f"Setting default value for COLLECTOSS_FACADE_REPO_DIRECTORY") SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", "/collectoss/facade/") else: # Check if the path is resolveable/make it absolute - logger.verbose(f"Resolving full path to COLLECTOSS_FACADE_REPO_DIRECTORY") + logger.debug(f"Resolving full path to COLLECTOSS_FACADE_REPO_DIRECTORY") SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", str(Path(facade_repo_directory).resolve(strict=True))) # ensure trailing slash is present @@ -109,7 +109,7 @@ def setup_facade_directory(logger): facade_directory = Path(facade_directory_path) if not facade_directory.exists(): - logger.verbose(f"Specified facade directory {facade_directory_path} does not exist. Creating...") + logger.debug(f"Specified facade directory {facade_directory_path} does not exist. Creating...") facade_directory.mkdir() git_credentials = facade_directory.joinpath(".git-credentials") @@ -118,7 +118,7 @@ def setup_facade_directory(logger): if not os.access(git_credentials, os.R_OK): logger.error(f"User {getpass.getuser()} does not have permission to write to {git_credentials}. Please select another location") else: - logger.verbose(f"Permission check passed for {git_credentials}") + logger.debug(f"Permission check passed for {git_credentials}") credentials = [] From d1b49a87161ed8217be0b8559c66d9253d89ba12 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 9 Jun 2026 09:51:12 -0400 Subject: [PATCH 62/77] ensure logger is correctly passed and all functions are imported Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index c2dffd4cf..40ae406aa 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -30,7 +30,7 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext import sqlalchemy as s -from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables, print_platform_information, setup_facade_directory, warn_import_repos +from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables, print_platform_information, setup_facade_directory, warn_import_repos, merge_config from keyman.KeyClient import KeyClient, KeyPublisher reset_logs = SystemEnv.get_bool("AUGUR_RESET_LOGS", True) @@ -69,11 +69,11 @@ def start(ctx, disable_collection, development, pidfile, port): check_init_schema() check_update_schema() - setup_facade_directory() + setup_facade_directory(logger) merge_config(ctx.obj.engine, logger) - warn_import_repos() + warn_import_repos(logger) print_platform_information() From 8722da4579604837233c2893e497e559e5f0fbe9 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 9 Jun 2026 09:51:21 -0400 Subject: [PATCH 63/77] use logger to output version information Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 2 +- collectoss/util/startup.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index 40ae406aa..173bc41a7 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -75,7 +75,7 @@ def start(ctx, disable_collection, development, pidfile, port): warn_import_repos(logger) - print_platform_information() + print_platform_information(logger) try: if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 392df2df3..0d58bf253 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -238,8 +238,7 @@ def warn_import_repos(logger): logger.warning("To import repos from a CSV, use the CLI: collectoss db add-repos /repos.csv") -def print_platform_information(): - - print(f"PATH: {os.environ.get('PATH')}") - print(f"Python executable (current): {sys.executable}") - print(f"Python version: {platform.python_version()}") \ No newline at end of file +def print_platform_information(logger): + logger.info(f"PATH: {os.environ.get('PATH')}") + logger.info(f"Python executable (current): {sys.executable}") + logger.info(f"Python version: {platform.python_version()}") \ No newline at end of file From 3ce2b1efb9a2b75769bce733f87aeda841bff4a5 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 9 Jun 2026 10:47:57 -0400 Subject: [PATCH 64/77] open facade directory file in write mode Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 0d58bf253..8921df20d 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -138,7 +138,7 @@ def setup_facade_directory(logger): user, key = gl_values credentials.append(f"https://{user}:{key}@gitlab.com") - with git_credentials.open(encoding="utf-8") as c: + with git_credentials.open("w", encoding="utf-8") as c: c.writelines(credentials) subprocess.call(["git", "config", "--global", "credential.helper", "store", "--file", str(git_credentials)]) From 3f7a8bec43128d5a31da0de5bd5b6405e5d01b97 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 9 Jun 2026 11:35:24 -0400 Subject: [PATCH 65/77] fix config bug with new startup process github keys being put in wrong places Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 8921df20d..200691a5f 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -168,11 +168,11 @@ def merge_config( """ github_api_key = github_api_key or SystemEnv.get("COLLECTOSS_GITHUB_API_KEY") - facade_repo_directory = github_api_key or SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") - gitlab_api_key = github_api_key or SystemEnv.get("COLLECTOSS_GITLAB_API_KEY") - redis_conn_string = github_api_key or SystemEnv.get("REDIS_CONN_STRING") - rabbitmq_conn_string = github_api_key or SystemEnv.get("RABBITMQ_CONN_STRING") - logs_directory = github_api_key or SystemEnv.get("COLLECTOSS_LOGS_DIRECTORY") + facade_repo_directory = facade_repo_directory or SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") + gitlab_api_key = gitlab_api_key or SystemEnv.get("COLLECTOSS_GITLAB_API_KEY") + redis_conn_string = redis_conn_string or SystemEnv.get("REDIS_CONN_STRING") + rabbitmq_conn_string = rabbitmq_conn_string or SystemEnv.get("RABBITMQ_CONN_STRING") + logs_directory = logs_directory or SystemEnv.get("COLLECTOSS_LOGS_DIRECTORY") keys = {} From 9618ab13ebe703af40b931a95d8fe555d4d895b3 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 9 Jun 2026 14:18:57 -0400 Subject: [PATCH 66/77] improve robustness of logger against missing log directories in config Signed-off-by: Adrian Edwards --- collectoss/application/logs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/collectoss/application/logs.py b/collectoss/application/logs.py index 253482877..aaf6cb5d8 100644 --- a/collectoss/application/logs.py +++ b/collectoss/application/logs.py @@ -192,8 +192,8 @@ def __init__(self, logger_name, disable_log_files=False,reset_logfiles=False,bas log_config = get_log_config() - if log_config["logs_directory"] != "": - base_log_dir=log_config["logs_directory"] + if log_config.get("logs_directory", "") != "": + base_log_dir=log_config.get("logs_directory") if reset_logfiles is True: try: From b1f52dc67786267dfc7b9f2406ead18720ec5098 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 9 Jun 2026 17:55:41 -0400 Subject: [PATCH 67/77] include logs directory environment variable in default compose Signed-off-by: Adrian Edwards --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index 5673fd5e9..82268e7e5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -59,6 +59,7 @@ services: - "COLLECTOSS_DB=postgresql+psycopg2://${COLLECTOSS_DB_USER:-augur}:${COLLECTOSS_DB_PASSWORD:-augur}@database:5432/augur" - "COLLECTOSS_DB_SCHEMA_BUILD=1" - COLLECTOSS_FACADE_REPO_DIRECTORY=/facade + - COLLECTOSS_LOGS_DIRECTORY=/logs - "COLLECTOSS_FLAGS=$COLLECTOSS_FLAGS" - "COLLECTOSS_GITHUB_API_KEY=${COLLECTOSS_GITHUB_API_KEY}" - "COLLECTOSS_GITLAB_API_KEY=${COLLECTOSS_GITLAB_API_KEY}" From 4dff338f29fc0bc1002a5a99d9b89ce61652057f Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 9 Jun 2026 18:32:15 -0400 Subject: [PATCH 68/77] correct translation error in git config command Signed-off-by: Adrian Edwards --- collectoss/util/startup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py index 200691a5f..8fe5b2374 100644 --- a/collectoss/util/startup.py +++ b/collectoss/util/startup.py @@ -141,7 +141,7 @@ def setup_facade_directory(logger): with git_credentials.open("w", encoding="utf-8") as c: c.writelines(credentials) - subprocess.call(["git", "config", "--global", "credential.helper", "store", "--file", str(git_credentials)]) + subprocess.call(["git", "config", "--global", "credential.helper", f"store --file {str(git_credentials)}"]) def merge_config( From c04981112980e0921ede15572c3bc42065eb04af Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 10 Jun 2026 11:52:30 -0400 Subject: [PATCH 69/77] lengthen timeout for docker job so that things actually can run to potential completion Temporary workaround until we have a faster DB initialization Signed-off-by: Adrian Edwards --- .github/workflows/build_docker.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index ec40bc11d..8e0da793d 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -178,7 +178,7 @@ jobs: docker compose -f docker-compose.yml up --no-build 2>&1 \ | (./scripts/ci/await_all.py /tmp/regex_matches.txt \ && docker compose -f docker-compose.yml down) - timeout-minutes: 3 + timeout-minutes: 8 env: AUGUR_GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }} AUGUR_GITHUB_USERNAME: ${{ github.repository_owner }} @@ -294,7 +294,7 @@ jobs: podman compose -f docker-compose.yml up --no-build 2>&1 \ | (./scripts/ci/await_all.py /tmp/regex_matches.txt \ && podman compose -f docker-compose.yml down) - timeout-minutes: 3 + timeout-minutes: 8 env: AUGUR_GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }} AUGUR_GITHUB_USERNAME: ${{ github.repository_owner }} From 97820fe1cbd4fc54b9ca59da3829fd5f4622f6c6 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 10 Jun 2026 12:52:42 -0400 Subject: [PATCH 70/77] move startup process to a python based preflight script this should hopefully take care of everything up front like the original scripts did, without creating issues related to the cursed import situation Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 15 --------------- docker/backend/Dockerfile | 4 +++- docker/backend/preflight.py | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 16 deletions(-) create mode 100755 docker/backend/preflight.py diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index 173bc41a7..f71059d1b 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -62,21 +62,6 @@ def start(ctx, disable_collection, development, pidfile, port): signal.signal(signal.SIGTERM, manager.shutdown_signal_handler) signal.signal(signal.SIGINT, manager.shutdown_signal_handler) - - collect_env_variables(logger) - - - check_init_schema() - check_update_schema() - - setup_facade_directory(logger) - - merge_config(ctx.obj.engine, logger) - - warn_import_repos(logger) - - print_platform_information(logger) - try: if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 65b2ccdbb..9b333b18b 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -113,4 +113,6 @@ RUN ${SCORECARD_DIR}/scorecard version RUN mkdir -p repos/ logs/ /collectoss/facade/ RUN ln -s /cache /collectoss/collectoss/static/cache -CMD ["collectoss", "backend", "start", "--pidfile", "/tmp/main.pid"] +COPY --chmod=755 ./docker/backend/preflight.py /preflight.py +RUN chmod +x /preflight.py +CMD ["python3", "/preflight.py", "&", "collectoss", "backend", "start", "--pidfile", "/tmp/main.pid"] diff --git a/docker/backend/preflight.py b/docker/backend/preflight.py new file mode 100755 index 000000000..a6a2ccb47 --- /dev/null +++ b/docker/backend/preflight.py @@ -0,0 +1,19 @@ +from collectoss.util.startup import collect_env_variables, check_init_schema, check_update_schema, setup_facade_directory, merge_config, warn_import_repos, print_platform_information +from collectoss.application.logs import SystemLogger + + +if __name__ == "__main__": + logger = SystemLogger("backend", reset_logfiles=False).get_logger() + + collect_env_variables(logger) + + check_init_schema() + check_update_schema() + + setup_facade_directory(logger) + + merge_config(ctx.obj.engine, logger) + + warn_import_repos(logger) + + print_platform_information(logger) From c804aa0ac4437e8575ef81a643130dedd96b7639 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 10 Jun 2026 14:29:42 -0400 Subject: [PATCH 71/77] pass in correct DB context Signed-off-by: Adrian Edwards --- docker/backend/preflight.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/backend/preflight.py b/docker/backend/preflight.py index a6a2ccb47..69de26e42 100755 --- a/docker/backend/preflight.py +++ b/docker/backend/preflight.py @@ -1,6 +1,6 @@ from collectoss.util.startup import collect_env_variables, check_init_schema, check_update_schema, setup_facade_directory, merge_config, warn_import_repos, print_platform_information from collectoss.application.logs import SystemLogger - +from collectoss.application.cli import DatabaseContext if __name__ == "__main__": logger = SystemLogger("backend", reset_logfiles=False).get_logger() @@ -12,7 +12,7 @@ setup_facade_directory(logger) - merge_config(ctx.obj.engine, logger) + merge_config(DatabaseContext().engine, logger) warn_import_repos(logger) From e58b343bda392db47d7a95ccd31754067266ff30 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 10 Jun 2026 14:29:52 -0400 Subject: [PATCH 72/77] exit successfully so the backend can start Signed-off-by: Adrian Edwards --- docker/backend/preflight.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/backend/preflight.py b/docker/backend/preflight.py index 69de26e42..d5271a3e9 100755 --- a/docker/backend/preflight.py +++ b/docker/backend/preflight.py @@ -1,6 +1,7 @@ from collectoss.util.startup import collect_env_variables, check_init_schema, check_update_schema, setup_facade_directory, merge_config, warn_import_repos, print_platform_information from collectoss.application.logs import SystemLogger from collectoss.application.cli import DatabaseContext +import sys if __name__ == "__main__": logger = SystemLogger("backend", reset_logfiles=False).get_logger() @@ -17,3 +18,5 @@ warn_import_repos(logger) print_platform_information(logger) + + sys.exit(0) From 666605fc56eeaa5150110eb92d33a9bf96b3901f Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 10 Jun 2026 14:30:08 -0400 Subject: [PATCH 73/77] && only works with the shell form of CMD Signed-off-by: Adrian Edwards --- docker/backend/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 9b333b18b..344756a06 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -115,4 +115,5 @@ RUN ln -s /cache /collectoss/collectoss/static/cache COPY --chmod=755 ./docker/backend/preflight.py /preflight.py RUN chmod +x /preflight.py -CMD ["python3", "/preflight.py", "&", "collectoss", "backend", "start", "--pidfile", "/tmp/main.pid"] +CMD python3 /preflight.py && collectoss backend start --pidfile /tmp/main.pid +# CMD ["collectoss", "backend", "start", "--pidfile", "/tmp/main.pid"] From 5a69838446ed9db77cc5a675d87d0c5b1371d747 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 10 Jun 2026 15:00:15 -0400 Subject: [PATCH 74/77] bypass SystemLogger, which requires a database connection Signed-off-by: Adrian Edwards --- docker/backend/preflight.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/docker/backend/preflight.py b/docker/backend/preflight.py index d5271a3e9..4207db8b7 100755 --- a/docker/backend/preflight.py +++ b/docker/backend/preflight.py @@ -1,10 +1,23 @@ from collectoss.util.startup import collect_env_variables, check_init_schema, check_update_schema, setup_facade_directory, merge_config, warn_import_repos, print_platform_information -from collectoss.application.logs import SystemLogger +from collectoss.application.logs import getFormatter from collectoss.application.cli import DatabaseContext import sys +import logging if __name__ == "__main__": - logger = SystemLogger("backend", reset_logfiles=False).get_logger() + # We cannot use systemLogger here because it depends on the database + # At this point in execution, the database may not yet be initialized or + # usable for configuration. So for now we DIY it as a temporary measure + # until we can more comprehensively improve the high level configuration system + logger = logging.getLogger("collectoss.preflight") + log_level = logging.INFO + if not logger.handlers: + handler = logging.StreamHandler() + handler.setLevel(log_level) + handler.setFormatter(getFormatter(log_level)) + logger.addHandler(handler) + logger.setLevel(log_level) + logger.propagate = False collect_env_variables(logger) From 51d1f32e21411d93e8e3788c53fe0bfee05d448c Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 10 Jun 2026 15:28:56 -0400 Subject: [PATCH 75/77] update env vars used in CI so we are passing correct values to the application Signed-off-by: Adrian Edwards --- .github/workflows/build_docker.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index 8e0da793d..5cd90b0ba 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -180,10 +180,10 @@ jobs: && docker compose -f docker-compose.yml down) timeout-minutes: 8 env: - AUGUR_GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }} - AUGUR_GITHUB_USERNAME: ${{ github.repository_owner }} - AUGUR_GITLAB_API_KEY: dummy - AUGUR_GITLAB_USERNAME: dummy + COLLECTOSS_GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }} + COLLECTOSS_GITHUB_USERNAME: ${{ github.repository_owner }} + COLLECTOSS_GITLAB_API_KEY: dummy + COLLECTOSS_GITLAB_USERNAME: dummy - name: Dump logs # Always run this step to get logs, even if the previous step fails @@ -296,16 +296,17 @@ jobs: && podman compose -f docker-compose.yml down) timeout-minutes: 8 env: - AUGUR_GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }} - AUGUR_GITHUB_USERNAME: ${{ github.repository_owner }} - AUGUR_GITLAB_API_KEY: dummy - AUGUR_GITLAB_USERNAME: dummy + COLLECTOSS_GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }} + COLLECTOSS_GITHUB_USERNAME: ${{ github.repository_owner }} + COLLECTOSS_GITLAB_API_KEY: dummy + COLLECTOSS_GITLAB_USERNAME: dummy - name: Dump logs # Always run this step to get logs, even if the previous step fails if: always() # We use tail so that we can see the name of each file as it's printed - run: "podman run -t --rm -v augur_logs:/logs bash -c 'find /logs -type f | xargs + run: + "podman run -t --rm -v augur_logs:/logs bash -c 'find /logs -type f | xargs tail -n +0'" push-image: From bba22afc416da91852bca714a0cf438c90c5b487 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 19 Jan 2026 14:37:36 -0500 Subject: [PATCH 76/77] add health check to postgres to ensure it is done with its thing before CollectOSS starts. This avoids some race conditions with the startup process that could create issues, especially on first initialization and especially in CI where we are still replaying migrations Co-Authored-By: guptapratykshh Co-Authored-By: Sukuna0007Abhi Signed-off-by: Adrian Edwards --- docker-compose.yml | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 82268e7e5..1b455a352 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,6 +14,15 @@ services: - "${COLLECTOSS_DB_PORT:-5432}:5432" volumes: - augurpostgres:/var/lib/postgresql/data + healthcheck: + test: + [ + "CMD-SHELL", + "pg_isready -U ${COLLECTOSS_DB_USER:-augur} -d ${COLLECTOSS_DB_NAME:-augur}", + ] + interval: 10s + timeout: 5s + retries: 5 redis: image: "redis:alpine" @@ -74,10 +83,14 @@ services: - CACHE_LOCKDIR=/cache - CELERYBEAT_SCHEDULE_DB=/tmp/celerybeat-schedule.db depends_on: - - database - - redis - - keyman - - rabbitmq + database: + condition: service_healthy + redis: + condition: service_started + keyman: + condition: service_started + rabbitmq: + condition: service_started user: 2345:2345 # Run as an arbitrary non-root user post_start: # Make sure the user has access to the volumes From f8e2d4f6ba098dc1a2b2b6d04a2256f3f2332322 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 10 Jun 2026 18:03:18 -0400 Subject: [PATCH 77/77] remove unused imports Signed-off-by: Adrian Edwards --- collectoss/application/cli/backend.py | 1 - 1 file changed, 1 deletion(-) diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index f71059d1b..3526a3c2c 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -30,7 +30,6 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext import sqlalchemy as s -from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables, print_platform_information, setup_facade_directory, warn_import_repos, merge_config from keyman.KeyClient import KeyClient, KeyPublisher reset_logs = SystemEnv.get_bool("AUGUR_RESET_LOGS", True)