diff --git a/collectoss/api/gunicorn_conf.py b/collectoss/api/gunicorn_conf.py index 22c11231a..ee7797471 100644 --- a/collectoss/api/gunicorn_conf.py +++ b/collectoss/api/gunicorn_conf.py @@ -7,6 +7,7 @@ from collectoss.application.db.lib import get_value from collectoss.application.db import dispose_database_engine +from collectoss.application.environment import SystemEnv logger = logging.getLogger(__name__) @@ -20,8 +21,8 @@ workers = multiprocessing.cpu_count() * 2 + 1 umask = 0o007 reload = True - -is_dev = os.getenv("AUGUR_DEV", 'False').lower() in ('true', '1', 't', 'y', 'yes') +# this satisfies the type checker +is_dev = SystemEnv.get_bool("AUGUR_DEV", False) if is_dev: @@ -40,7 +41,8 @@ # set the log location for gunicorn logs_directory = get_value('Logging', 'logs_directory') -is_docker = os.getenv("AUGUR_DOCKER_DEPLOY", 'False').lower() in ('true', '1', 't', 'y', 'yes') +# this syntax satisfies the type checker +is_docker = SystemEnv.get_bool("AUGUR_DOCKER_DEPLOY", False) accesslog = f"{logs_directory}/gunicorn.log" errorlog = f"{logs_directory}/gunicorn.log" diff --git a/collectoss/api/routes/auggie.py b/collectoss/api/routes/auggie.py index 18642498f..4cde77084 100644 --- a/collectoss/api/routes/auggie.py +++ b/collectoss/api/routes/auggie.py @@ -14,6 +14,8 @@ import requests import slack +from collectoss.application.environment import SystemEnv + from ..server import app @@ -252,7 +254,7 @@ def get_auggie_user(): # return Response(response=response, status=200, mimetype="application/json") ## From Method profile_name = 'collectoss' - if os.environ.get('AUGUR_IS_PROD'): + if SystemEnv.get('COLLECTOSS_IS_PROD'): profile_name = 'default' client = boto3.Session(region_name='us-east-1', profile_name=profile_name).client('dynamodb') response = client.get_item( @@ -278,7 +280,7 @@ def update_auggie_user_tracking(): # return Response(response=response, status=200, mimetype="application/json") ## From Method profile_name = 'collectoss' - if os.environ.get('AUGUR_IS_PROD'): + if SystemEnv.get('COLLECTOSS_IS_PROD'): profile_name = 'default' client = boto3.Session(region_name='us-east-1', profile_name=profile_name).client('dynamodb') response = client.update_item( @@ -326,7 +328,7 @@ def slack_login(): print("slack_login") r = requests.get( - url=f'https://slack.com/api/oauth.v2.access?code={body["code"]}&client_id={os.environ["AUGGIE_CLIENT_ID"]}&client_secret={os.environ["AUGGIE_CLIENT_SECRET"]}&redirect_uri=http%3A%2F%2Flocalhost%3A8080') + url=f'https://slack.com/api/oauth.v2.access?code={body["code"]}&client_id={SystemEnv.get("AUGGIE_CLIENT_ID")}&client_secret={SystemEnv.get("AUGGIE_CLIENT_SECRET")}&redirect_uri=http%3A%2F%2Flocalhost%3A8080') data = r.json() if (data["ok"]): @@ -340,7 +342,7 @@ def slack_login(): email = user_response["user"]["email"] profile_name = 'collectoss' - if os.environ.get('AUGUR_IS_PROD'): + if SystemEnv.get('COLLECTOSS_IS_PROD'): profile_name = 'default' print("Making Boto3 Session") client = boto3.Session(region_name='us-east-1', diff --git a/collectoss/api/server.py b/collectoss/api/server.py index a4d212f58..7955cd7a1 100644 --- a/collectoss/api/server.py +++ b/collectoss/api/server.py @@ -27,6 +27,7 @@ from collectoss.application.logs import SystemLogger from collectoss.application.db.session import DatabaseSession from collectoss.application.config import SystemConfig +from collectoss.application.environment import SystemEnv from collectoss.application.db.engine import get_database_string, create_database_engine from collectoss.application.db.models import Repo, Issue, PullRequest, Message, PullRequestReview, Commit, IssueAssignee, PullRequestAssignee, PullRequestCommit, PullRequestFile, Contributor, IssueLabel, PullRequestLabel, ContributorsAlias, Release, ClientApplication @@ -300,8 +301,8 @@ def create_cache_manager() -> CacheManager: cache_config = { 'cache.type': 'file', # Allow setting cache directories via environment variables - 'cache.data_dir': Path(env.setdefault("CACHE_DATADIR", 'runtime/cache/')), - 'cache.lock_dir': Path(env.setdefault("CACHE_LOCKDIR", 'runtime/cache/')), + 'cache.data_dir': Path(SystemEnv.set_default("CACHE_DATADIR", 'runtime/cache/')), + 'cache.lock_dir': Path(SystemEnv.set_default("CACHE_LOCKDIR", 'runtime/cache/')), } if not os.path.exists(cache_config['cache.data_dir']): diff --git a/collectoss/api/view/init.py b/collectoss/api/view/init.py index ab4708793..b26752af9 100644 --- a/collectoss/api/view/init.py +++ b/collectoss/api/view/init.py @@ -1,13 +1,10 @@ -import os from pathlib import Path -from .server import Environment from collectoss.application.logs import SystemLogger import secrets, yaml - -env = Environment() +from collectoss.application.environment import SystemEnv # load configuration files and initialize globals -configFile = Path(env.setdefault("CONFIG_LOCATION", "config.yml")) +configFile = Path(SystemEnv.get("CONFIG_LOCATION") or "config.yml") settings = {} diff --git a/collectoss/api/view/server/Environment.py b/collectoss/api/view/server/Environment.py deleted file mode 100644 index 76b8207ca..000000000 --- a/collectoss/api/view/server/Environment.py +++ /dev/null @@ -1,52 +0,0 @@ -import os - -class Environment: - """ - This class is used to make dealing with environment variables easier. It - allows you to set multiple environment variables at once, and to get items - with subscript notation without needing to deal with the particularities of - non-existent values. - """ - def __init__(self, **kwargs): - for (key, value) in kwargs.items(): - self[key] = value - - def setdefault(self, key, value): - if not self[key]: - self[key] = value - return value - return self[key] - - def setall(self, **kwargs): - result = {} - for (key, value) in kwargs.items(): - if self[key]: - result[key] = self[key] - self[key] = value - - def getany(self, *args): - result = {} - for arg in args: - if self[arg]: - result[arg] = self[arg] - return result - - def as_type(self, type, key): - if self[key]: - return type(self[key]) - return None - - def __getitem__(self, key): - return os.getenv(key) - - def __setitem__(self, key, value): - os.environ[key] = str(value) - - def __len__(self)-> int: - return len(os.environ) - - def __str__(self)-> str: - return str(os.environ) - - def __iter__(self): - return (item for item in os.environ.items()) \ No newline at end of file diff --git a/collectoss/api/view/server/__init__.py b/collectoss/api/view/server/__init__.py index e919a597a..98ce903be 100644 --- a/collectoss/api/view/server/__init__.py +++ b/collectoss/api/view/server/__init__.py @@ -1,2 +1 @@ -from .LoginException import LoginException -from .Environment import Environment \ No newline at end of file +from .LoginException import LoginException \ No newline at end of file diff --git a/collectoss/application/cli/__init__.py b/collectoss/application/cli/__init__.py index 8081d6a8e..444473016 100644 --- a/collectoss/application/cli/__init__.py +++ b/collectoss/application/cli/__init__.py @@ -10,7 +10,9 @@ from collectoss.application.db.engine import DatabaseEngine from collectoss.application.db import get_engine, dispose_database_engine -from sqlalchemy.exc import OperationalError +from sqlalchemy.exc import OperationalError +from collectoss.application.environment import SystemEnv + def check_connectivity(urls=["http://chaoss.community", "http://github.com", "http://gitlab.com"], timeout=10.0): @@ -65,11 +67,11 @@ def new_func(ctx, *args, **kwargs): return ctx.invoke(function_db_connection, *args, **kwargs) except OperationalError as e: - db_environment_var = os.getenv("AUGUR_DB") + db_environment_var = SystemEnv.get("COLLECTOSS_DB") # determine the location to print in error string if db_environment_var: - location = f"the AUGUR_DB environment variable\nAUGUR_DB={os.getenv('AUGUR_DB')}" + location = f"the COLLECTOSS_DB environment variable\nCOLLECTOSS_DB={SystemEnv.get('COLLECTOSS_DB')}" else: with open("db.config.json", 'r') as f: db_config = json.load(f) diff --git a/collectoss/application/cli/_multicommand.py b/collectoss/application/cli/_multicommand.py index 13186e7bb..06aae01de 100644 --- a/collectoss/application/cli/_multicommand.py +++ b/collectoss/application/cli/_multicommand.py @@ -11,7 +11,7 @@ from pathlib import Path # import collectoss.application -CONTEXT_SETTINGS = dict(auto_envvar_prefix='AUGUR') +CONTEXT_SETTINGS = dict(auto_envvar_prefix='COLLECTOSS') class CLIMultiCommand(click.MultiCommand): def __commands_folder(self): diff --git a/collectoss/application/cli/api.py b/collectoss/application/cli/api.py index a8bb9e53b..0c567c590 100644 --- a/collectoss/application/cli/api.py +++ b/collectoss/application/cli/api.py @@ -17,6 +17,8 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext from collectoss.application.cli._cli_util import _broadcast_signal_to_processes, raise_open_file_limit, clear_redis_caches, clear_rabbitmq_messages from collectoss.application.db.lib import get_value +from collectoss.application.environment import SystemEnv + logger = SystemLogger("collectoss", reset_logfiles=False).get_logger() @@ -36,7 +38,7 @@ def start(ctx, development, port): """Start CollectOSS's backend server.""" try: - if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) except Exception as e: logger.error( @@ -46,7 +48,7 @@ def start(ctx, development, port): raise e if development: - os.environ["AUGUR_DEV"] = "1" + SystemEnv.set("AUGUR_DEV", "1") logger.info("Starting in development mode") try: @@ -142,7 +144,7 @@ def get_api_processes(): def is_api_process(process): command = ''.join(process.info['cmdline'][:]).lower() - if os.getenv('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in command: + if SystemEnv.get('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in command: if process.pid != os.getpid(): diff --git a/collectoss/application/cli/backend.py b/collectoss/application/cli/backend.py index 8add0ce18..173bc41a7 100644 --- a/collectoss/application/cli/backend.py +++ b/collectoss/application/cli/backend.py @@ -15,6 +15,7 @@ import requests from redis.exceptions import ConnectionError as RedisConnectionError +from collectoss.application.environment import SystemEnv from collectoss.tasks.start_tasks import collection_monitor, create_collection_status_records from collectoss.tasks.git.facade_tasks import clone_repos from collectoss.tasks.github.contributors import process_contributors @@ -29,9 +30,10 @@ from collectoss.application.cli import test_connection, test_db_connection, with_database, DatabaseContext import sqlalchemy as s +from collectoss.util.startup import check_init_schema, check_update_schema, collect_env_variables, print_platform_information, setup_facade_directory, warn_import_repos, merge_config from keyman.KeyClient import KeyClient, KeyPublisher -reset_logs = os.getenv("AUGUR_RESET_LOGS", 'True').lower() in ('true', '1', 't', 'y', 'yes') +reset_logs = SystemEnv.get_bool("AUGUR_RESET_LOGS", True) logger = SystemLogger("collectoss", reset_logfiles=reset_logs).get_logger() @@ -60,8 +62,23 @@ def start(ctx, disable_collection, development, pidfile, port): signal.signal(signal.SIGTERM, manager.shutdown_signal_handler) signal.signal(signal.SIGINT, manager.shutdown_signal_handler) + + collect_env_variables(logger) + + + check_init_schema() + check_update_schema() + + setup_facade_directory(logger) + + merge_config(ctx.obj.engine, logger) + + warn_import_repos(logger) + + print_platform_information(logger) + try: - if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) except Exception as e: logger.error( @@ -71,10 +88,10 @@ def start(ctx, disable_collection, development, pidfile, port): raise e if development: - os.environ["AUGUR_DEV"] = "1" + SystemEnv.set("AUGUR_DEV", "1") logger.info("Starting in development mode") - os.environ["AUGUR_PIDFILE"] = pidfile + SystemEnv.set("AUGUR_PIDFILE", pidfile) try: gunicorn_location = os.getcwd() + "/collectoss/api/gunicorn_conf.py" @@ -86,10 +103,10 @@ def start(ctx, disable_collection, development, pidfile, port): if not port: port = get_value("Server", "port") - os.environ["AUGUR_PORT"] = str(port) + SystemEnv.set("AUGUR_PORT", str(port)) if disable_collection: - os.environ["AUGUR_DISABLE_COLLECTION"] = "1" + SystemEnv.set("AUGUR_DISABLE_COLLECTION", "1") core_worker_count = get_value("Celery", 'core_worker_count') secondary_worker_count = get_value("Celery", 'secondary_worker_count') @@ -130,7 +147,7 @@ def start(ctx, disable_collection, development, pidfile, port): processes = start_celery_worker_processes((core_worker_count, secondary_worker_count, facade_worker_count), disable_collection) manager.processes = processes - celery_beat_schedule_db = os.getenv("CELERYBEAT_SCHEDULE_DB", "celerybeat-schedule.db") + celery_beat_schedule_db = SystemEnv.get("CELERYBEAT_SCHEDULE_DB", "celerybeat-schedule.db") if os.path.exists(celery_beat_schedule_db): logger.info("Deleting old task schedule") os.remove(celery_beat_schedule_db) @@ -144,7 +161,7 @@ def start(ctx, disable_collection, development, pidfile, port): manager.keypub = keypub if not disable_collection: - if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": orchestrator = subprocess.Popen("python keyman/Orchestrator.py".split()) # Wait for orchestrator startup @@ -355,10 +372,10 @@ def export_env(config): Exports your GitHub key and database credentials """ - export_file = open(os.getenv('AUGUR_EXPORT_FILE', 'collectoss_export_env.sh'), 'w+') + export_file = open(SystemEnv.get('COLLECTOSS_EXPORT_FILE') or 'collectoss_export_env.sh', 'w+') export_file.write('#!/bin/bash') export_file.write('\n') - env_file = open(os.getenv('AUGUR_ENV_FILE', 'docker_env.txt'), 'w+') + env_file = open(SystemEnv.get('COLLECTOSS_ENV_FILE') or 'docker_env.txt', 'w+') for env_var in config.get_env_config().items(): if "LOG" not in env_var[0]: @@ -403,7 +420,7 @@ def get_backend_processes(): for process in psutil.process_iter(['cmdline', 'name', 'environ']): if process.info['cmdline'] is not None and process.info['environ'] is not None: try: - if os.getenv('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in ''.join(process.info['cmdline'][:]).lower(): + if SystemEnv.get('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in ''.join(process.info['cmdline'][:]).lower(): if process.pid != os.getpid(): process_list.append(process) except (KeyError, FileNotFoundError): diff --git a/collectoss/application/cli/collection.py b/collectoss/application/cli/collection.py index 78b6f5d13..adf4b50e8 100644 --- a/collectoss/application/cli/collection.py +++ b/collectoss/application/cli/collection.py @@ -14,6 +14,7 @@ import traceback import sqlalchemy as s +from collectoss.application.environment import SystemEnv from collectoss.tasks.start_tasks import collection_monitor, create_collection_status_records from collectoss.tasks.git.facade_tasks import clone_repos from collectoss.tasks.github.util.github_api_key_handler import GithubApiKeyHandler @@ -45,7 +46,7 @@ def start(ctx, development): """Start CollectOSS's backend server.""" try: - if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": + if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": raise_open_file_limit(100000) except Exception as e: logger.error( @@ -75,7 +76,7 @@ def start(ctx, development): keypub.publish(key, "gitlab_rest") if development: - os.environ["AUGUR_DEV"] = "1" + SystemEnv.set("AUGUR_DEV", "1") logger.info("Starting in development mode") core_worker_count = get_value("Celery", 'core_worker_count') @@ -237,7 +238,7 @@ def get_collection_processes(): def is_collection_process(process): command = ''.join(process.info['cmdline'][:]).lower() - if os.getenv('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in command: + if SystemEnv.get('VIRTUAL_ENV') in process.info['environ']['VIRTUAL_ENV'] and 'python' in command: if process.pid != os.getpid(): if "collectossbackendcollection" in command or "celery_app.celery_appbeat" in command: diff --git a/collectoss/application/cli/config.py b/collectoss/application/cli/config.py index 2a9a09320..681c9d201 100644 --- a/collectoss/application/cli/config.py +++ b/collectoss/application/cli/config.py @@ -11,12 +11,14 @@ from collectoss.application.db.session import DatabaseSession from collectoss.application.config import SystemConfig, redact_setting_value from collectoss.application.cli import DatabaseContext, test_connection, test_db_connection, with_database -from collectoss.util.inspect_without_import import get_phase_names_without_import -ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) +from collectoss.util.startup import merge_config logger = logging.getLogger(__name__) -ENVVAR_PREFIX = "AUGUR_" +ENVVAR_PREFIX = "COLLECTOSS_" + +def get_transitional_envs(name: str) -> list: + return [ENVVAR_PREFIX + name, "AUGUR_" + name] @click.group('config', short_help='Generate an augur.config.json') @click.pass_context @@ -24,12 +26,12 @@ def cli(ctx): ctx.obj = DatabaseContext() @cli.command('init') -@click.option('--github-api-key', help="GitHub API key for data collection from the GitHub API", envvar=ENVVAR_PREFIX + 'GITHUB_API_KEY') -@click.option('--facade-repo-directory', help="Directory on the database server where Facade should clone repos", envvar=ENVVAR_PREFIX + 'FACADE_REPO_DIRECTORY') -@click.option('--gitlab-api-key', help="GitLab API key for data collection from the GitLab API", envvar=ENVVAR_PREFIX + 'GITLAB_API_KEY') -@click.option('--redis-conn-string', help="String to connect to redis cache", envvar=ENVVAR_PREFIX + 'REDIS_CONN_STRING') -@click.option('--rabbitmq-conn-string', help="String to connect to rabbitmq broker", envvar=ENVVAR_PREFIX + 'RABBITMQ_CONN_STRING') -@click.option('--logs-directory', help="Directory to store logs", envvar=ENVVAR_PREFIX + 'LOGS_DIRECTORY') +@click.option('--github-api-key', help="GitHub API key for data collection from the GitHub API", envvar=get_transitional_envs('GITHUB_API_KEY')) +@click.option('--facade-repo-directory', help="Directory on the database server where Facade should clone repos", envvar=get_transitional_envs('FACADE_REPO_DIRECTORY')) +@click.option('--gitlab-api-key', help="GitLab API key for data collection from the GitLab API", envvar=get_transitional_envs('GITLAB_API_KEY')) +@click.option('--redis-conn-string', help="String to connect to redis cache", envvar=get_transitional_envs('REDIS_CONN_STRING')) +@click.option('--rabbitmq-conn-string', help="String to connect to rabbitmq broker", envvar=get_transitional_envs('RABBITMQ_CONN_STRING')) +@click.option('--logs-directory', help="Directory to store logs", envvar=get_transitional_envs('LOGS_DIRECTORY')) @test_connection @test_db_connection @with_database @@ -58,52 +60,8 @@ def init_config(ctx, github_api_key, facade_repo_directory, gitlab_api_key, redi if facade_repo_directory[-1] != "/": facade_repo_directory += "/" - - keys = {} - - keys["github_api_key"] = github_api_key - keys["gitlab_api_key"] = gitlab_api_key - - with DatabaseSession(logger, engine=ctx.obj.engine) as session: - - config = SystemConfig(logger, session) - - augmented_config = config.base_config - - phase_names = get_phase_names_without_import() - - #Add all phases as enabled by default - for name in phase_names: - - if name not in augmented_config['Task_Routine']: - augmented_config['Task_Routine'].update({name : 1}) - - #print(default_config) - if redis_conn_string: - - try: - redis_string_array = redis_conn_string.split("/") - cache_number = int(redis_string_array[-1]) - digits = len(str(cache_number)) - - redis_conn_string = redis_conn_string[:-digits] - - except ValueError: - pass - - augmented_config["Redis"]["connection_string"] = redis_conn_string - - if rabbitmq_conn_string: - augmented_config["RabbitMQ"]["connection_string"] = rabbitmq_conn_string - - augmented_config["Keys"] = keys - - augmented_config["Facade"]["repo_directory"] = facade_repo_directory - - augmented_config["Logging"]["logs_directory"] = logs_directory or (ROOT_PROJECT_REPO_DIRECTORY + "/logs/") - - config.load_config_from_dict(augmented_config) - + merge_config(ctx.obj.engine, logger, github_api_key, facade_repo_directory, gitlab_api_key, redis_conn_string, rabbitmq_conn_string, logs_directory) + @cli.command('load') @click.option('--file', required=True) diff --git a/collectoss/application/cli/db.py b/collectoss/application/cli/db.py index 25ea8a88e..7b6bc7c09 100644 --- a/collectoss/application/cli/db.py +++ b/collectoss/application/cli/db.py @@ -28,6 +28,8 @@ process_repo_csv, process_repo_group_csv, ) +from collectoss.application.environment import SystemEnv +from collectoss.util.startup import check_init_schema, check_update_schema logger = logging.getLogger(__name__) @@ -290,7 +292,7 @@ def upgrade_db_version(): """ Upgrade the configured database to the latest version """ - check_call(["alembic", "upgrade", "head"]) + check_update_schema() @cli.command("check-for-upgrade") @@ -310,7 +312,8 @@ def create_schema(): """ Create schema in the configured database """ - check_call(["alembic", "upgrade", "head"]) + # check_call(["alembic", "upgrade", "head"]) + check_init_schema() def generate_key(length): @@ -379,7 +382,7 @@ def get_api_key(ctx): short_help="Check the ~/.pgpass file for CollectOSS's database credentials", ) def check_pgpass(): - db_environment_var = getenv("AUGUR_DB") + db_environment_var = SystemEnv.get("COLLECTOSS_DB") if db_environment_var: # gets the user, passowrd, host, port, and database_name out of environment variable # assumes database string of structure //:@:/ @@ -495,7 +498,7 @@ def run_psql_command_in_database(target_type, target): logger.error("Invalid target type. Exiting...") exit(1) - db_environment_var = getenv("AUGUR_DB") + db_environment_var = SystemEnv.get("COLLECTOSS_DB") # db_json_file_location = os.getcwd() + "/db.config.json" # db_json_exists = os.path.exists(db_json_file_location) diff --git a/collectoss/application/config.py b/collectoss/application/config.py index 56e6c57ae..16f62b5ad 100644 --- a/collectoss/application/config.py +++ b/collectoss/application/config.py @@ -7,6 +7,8 @@ from collectoss.application.db.models import Config from collectoss.application.db.util import execute_session_query, convert_type_of_value from pathlib import Path +from collectoss.application.environment import SystemEnv + import logging def get_development_flag_from_config(): @@ -27,7 +29,7 @@ def get_development_flag_from_config(): return flag def get_development_flag(): - return os.getenv("AUGUR_DEV") or get_development_flag_from_config() or False + return SystemEnv.get("COLLECTOSS_DEV") or get_development_flag_from_config() or False def redact_setting_value(section_name, setting_name, value): value_redacted = value if section_name != "Keys" else "REDACTED" @@ -167,7 +169,7 @@ def __init__(self, logger, session: DatabaseSession, config_sources: list = None JsonConfig(default_config, logger) ] - config_dir = Path(os.getenv("CONFIG_DATADIR", "./")) + config_dir = Path(SystemEnv.get("CONFIG_DATADIR") or "./") config_path = config_dir.joinpath("augur.json") if config_path.exists(): config_sources.append(JsonConfig(json.loads(config_path.read_text(encoding="UTF-8")), logger)) diff --git a/collectoss/application/db/engine.py b/collectoss/application/db/engine.py index e00c3c992..884d5a61c 100644 --- a/collectoss/application/db/engine.py +++ b/collectoss/application/db/engine.py @@ -7,6 +7,7 @@ from sqlalchemy import create_engine, event from sqlalchemy.engine import Engine +from collectoss.application.environment import SystemEnv from collectoss.application.db.util import catch_operational_error @@ -61,7 +62,7 @@ def get_database_string() -> str: postgres database string """ - db_environment_var = os.getenv("AUGUR_DB") + db_environment_var = SystemEnv.get("COLLECTOSS_DB") try: current_dir = os.getcwd() @@ -74,7 +75,7 @@ def get_database_string() -> str: if not db_environment_var and not db_json_exists: - print("ERROR no way to get connection to the database. \n\t\t\t\t\t\t There is no db.config.json and the AUGUR_DB environment variable is not set\n\t\t\t\t\t\t Please run make install or set the AUGUR_DB environment then run make install") + print("ERROR no way to get connection to the database. \n\t\t\t\t\t\t There is no db.config.json and the COLLECTOSS_DB environment variable is not set\n\t\t\t\t\t\t Please run make install or set the COLLECTOSS_DB environment then run make install") sys.exit() if db_environment_var: diff --git a/collectoss/application/environment.py b/collectoss/application/environment.py new file mode 100644 index 000000000..eee8942ed --- /dev/null +++ b/collectoss/application/environment.py @@ -0,0 +1,86 @@ +from typing import Optional +import os +import warnings +import logging + +logger = logging.getLogger(__name__) + +def extract_prefix(key: str, prefixes: list[str], separator = "_") -> Optional[str]: + """Detect and return the prefix present on the provided key + + Args: + key (str): the key to remove the prefix from + prefixes (list[str]): the prefixes to look for + separator (str, optional): the separator between elements of the key to also remove (if they would otherwise be dangling). Defaults to "_". + + Returns: + str: The detected prefix (including any separators) if any, otherwise None + """ + k = key.upper() + for p in prefixes: + p_up = p.upper() + if k == p_up: + return key[:len(p)] + if k.startswith(p_up + separator): + return key[:len(p) + len(separator)] + return None + + +class SystemEnv: + """Centralized environment variable access + Built for enabling migration of environment variable names + """ + + _prefixes = ["COLLECTOSS", "AUGUR"] + _warn_prefixes = ["AUGUR"] + _separator = "_" + + @classmethod + def get(cls, key: str, default = None, prefixes = _prefixes) -> Optional[str]: + # extract the suffix so we can try multiple prefixes + canonical_prefix = extract_prefix(key, prefixes, cls._separator) + suffix = key[len(canonical_prefix):] if canonical_prefix is not None else key + # check prefixes in order and use the first one that has a value + for p in prefixes: + check_key = f"{p}{cls._separator}{suffix}" + value = os.getenv(check_key, None) + + if value is not None: + # emit a warning if configured + if p in cls._warn_prefixes: + msg = ( + f"Environment variable '{check_key}' is deprecated. " + f"Use '{key}' instead. This automatic recovery may become a failure in a future version " + ) + logger.warning(msg) + warnings.warn(msg, DeprecationWarning, stacklevel=2) + + return value + + if not canonical_prefix: + return os.getenv(key, default) + + return default + + @classmethod + def get_bool(cls, key:str, default: bool, prefixes = _prefixes) -> bool: + """gets a value from the environment and cast it to a boolean + """ + raw_val = cls.get(key, None, prefixes) + if raw_val is None: + return default + return raw_val.lower() in ('true', '1', 't', 'y', 'yes') + + @classmethod + def set(cls, key: str, value: str, overwrite=True) -> None: + if os.getenv(key) is not None and not overwrite: + return + + os.environ[key] = value + + @classmethod + def set_default(cls, key: str, value: str) -> None: + if cls.get(key) is None: + cls.set(key, value) + return value + return cls.get(key) \ No newline at end of file diff --git a/collectoss/application/logs.py b/collectoss/application/logs.py index 253482877..aaf6cb5d8 100644 --- a/collectoss/application/logs.py +++ b/collectoss/application/logs.py @@ -192,8 +192,8 @@ def __init__(self, logger_name, disable_log_files=False,reset_logfiles=False,bas log_config = get_log_config() - if log_config["logs_directory"] != "": - base_log_dir=log_config["logs_directory"] + if log_config.get("logs_directory", "") != "": + base_log_dir=log_config.get("logs_directory") if reset_logfiles is True: try: diff --git a/collectoss/tasks/git/dependency_tasks/core.py b/collectoss/tasks/git/dependency_tasks/core.py index a9e74b4e1..0648231b0 100644 --- a/collectoss/tasks/git/dependency_tasks/core.py +++ b/collectoss/tasks/git/dependency_tasks/core.py @@ -1,7 +1,8 @@ from datetime import datetime import os from collectoss.application.db.models import * -from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value, get_session +from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value +from collectoss.application.environment import SystemEnv from collectoss.tasks.github.util.github_api_key_handler import GithubApiKeyHandler from collectoss.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc from collectoss.tasks.util.worker_util import parse_json_from_subprocess_call @@ -79,19 +80,11 @@ def generate_scorecard(logger, repo_git): command = '--repo=' + path #this is path where our scorecard project is located - path_to_scorecard = os.getenv('SCORECARD_DIR', os.environ['HOME'] + '/scorecard') + path_to_scorecard = SystemEnv.get('SCORECARD_DIR', os.path.expanduser('~/scorecard')) #setting the environmental variable which is required by scorecard - - with get_session() as session: - #key_handler = GithubRandomKeyAuth(logger) - key_handler = GithubApiKeyHandler(logger) - os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key() - - # This seems outdated - #setting the environmental variable which is required by scorecard - #key_handler = GithubApiKeyHandler(session, session.logger) - #os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key() + key_handler = GithubApiKeyHandler(logger) + SystemEnv.set('GITHUB_AUTH_TOKEN', key_handler.get_random_key()) try: required_output = parse_json_from_subprocess_call(logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard) diff --git a/collectoss/tasks/git/scc_value_tasks/core.py b/collectoss/tasks/git/scc_value_tasks/core.py index 7c9e0bafd..770165522 100644 --- a/collectoss/tasks/git/scc_value_tasks/core.py +++ b/collectoss/tasks/git/scc_value_tasks/core.py @@ -2,6 +2,7 @@ import os from collectoss.application.db.models import * from collectoss.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value +from collectoss.application.environment import SystemEnv from collectoss.tasks.util.worker_util import parse_json_from_subprocess_call from collectoss.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path @@ -20,7 +21,7 @@ def value_model(logger,repo_git): logger.info(f"Repo ID: {repo_id}, Path: {path}") logger.info('Running scc...') - path_to_scc = os.getenv('SCC_DIR', os.environ['HOME'] + '/scc') + path_to_scc = SystemEnv.get('SCC_DIR', os.path.expanduser('~/scc')) required_output = parse_json_from_subprocess_call(logger,['./scc', '-f','json','--by-file', path], cwd=path_to_scc) diff --git a/collectoss/tasks/git/util/facade_worker/facade_worker/config.py b/collectoss/tasks/git/util/facade_worker/facade_worker/config.py index 7da6495bd..2b536a3a4 100644 --- a/collectoss/tasks/git/util/facade_worker/facade_worker/config.py +++ b/collectoss/tasks/git/util/facade_worker/facade_worker/config.py @@ -40,11 +40,13 @@ from collectoss.application.db.lib import execute_sql from logging import Logger +from collectoss.application.environment import SystemEnv + logger = logging.getLogger(__name__) def get_database_args_from_env(): - db_str = os.getenv("AUGUR_DB") + db_str = SystemEnv.get("COLLECTOSS_DB") try: db_json_file_location = os.getcwd() + "/db.config.json" except FileNotFoundError: @@ -55,7 +57,7 @@ def get_database_args_from_env(): if not db_str and not db_json_exists: - logger.error("ERROR no way to get connection to the database. \n\t\t\t\t\t\t There is no db.config.json and the AUGUR_DB environment variable is not set\n\t\t\t\t\t\t Please run make install or set the AUGUR_DB environment then run make install") + logger.error("ERROR no way to get connection to the database. \n\t\t\t\t\t\t There is no db.config.json and the COLLECTOSS_DB environment variable is not set\n\t\t\t\t\t\t Please run make install or set the COLLECTOSS_DB environment then run make install") sys.exit() credentials = {} diff --git a/collectoss/tasks/init/celery_app.py b/collectoss/tasks/init/celery_app.py index e14230f99..4b10af18a 100644 --- a/collectoss/tasks/init/celery_app.py +++ b/collectoss/tasks/init/celery_app.py @@ -17,6 +17,7 @@ from collectoss.application.db import get_engine from collectoss.application.db.lib import get_session from collectoss.application.config import SystemConfig +from collectoss.application.environment import SystemEnv from collectoss.tasks.init import get_redis_conn_values, get_rabbitmq_conn_string from collectoss.application.db.models import Repo from collectoss.tasks.util.collection_state import CollectionState @@ -63,7 +64,7 @@ tasks = start_tasks + github_tasks + gitlab_tasks + git_tasks + materialized_view_tasks + frontend_tasks -if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": +if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": tasks += data_analysis_tasks redis_db_number, redis_conn_string = get_redis_conn_values() diff --git a/collectoss/tasks/start_tasks.py b/collectoss/tasks/start_tasks.py index 1f36dd90b..51bf25cd7 100644 --- a/collectoss/tasks/start_tasks.py +++ b/collectoss/tasks/start_tasks.py @@ -14,7 +14,8 @@ from collectoss.tasks.github.pull_requests.tasks import * from collectoss.tasks.github.repo_info.tasks import * from collectoss.tasks.github.releases.tasks import * -if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": +from collectoss.application.environment import SystemEnv +if SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') != "1": from collectoss.tasks.data_analysis import * from collectoss.tasks.github.detect_move.tasks import detect_github_repo_move_core, detect_github_repo_move_secondary from collectoss.tasks.github.releases.tasks import collect_releases @@ -38,7 +39,7 @@ from collectoss.application.db.lib import execute_sql, get_session from collectoss.application.config import SystemConfig -RUNNING_DOCKER = os.environ.get('AUGUR_DOCKER_DEPLOY') == "1" +RUNNING_DOCKER = SystemEnv.get('COLLECTOSS_DOCKER_DEPLOY') == "1" CELERY_GROUP_TYPE = type(group()) CELERY_CHAIN_TYPE = type(chain()) diff --git a/collectoss/util/startup.py b/collectoss/util/startup.py new file mode 100644 index 000000000..8fe5b2374 --- /dev/null +++ b/collectoss/util/startup.py @@ -0,0 +1,244 @@ +## Startup helpers + + +from pathlib import Path +import os +import getpass +import subprocess +from subprocess import check_call +import platform +import sys + +from sqlalchemy.orm.attributes import get_history +from collectoss.application.config import SystemConfig +from collectoss.application.db.session import DatabaseSession +from collectoss.application.environment import SystemEnv +from typing_extensions import deprecated + +from collectoss.util.inspect_without_import import get_phase_names_without_import + +ROOT_PROJECT_REPO_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) + +def check_init_schema(): + """Initialize the CollectOSS database schema as appropriate + """ + + pass + # does public.alembic_version exist? + # if yes, do nothing + # if no, do a sanity check to make sure the other schemas dont exist, + # then init the current db with sqlalchemy and stamp the current version with alembic + +def check_update_schema(): + """ensure the CollectOSS schema is on the latest version + """ + pass + # alembic upgrade head, unless theres an env var preventing automatic migration + check_call(["alembic", "upgrade", "head"]) + +def collect_env_variables(logger): + """convenience helper for assembling more complex environment variables out of smaller ones + and other environment variable convenience operations + """ + + if SystemEnv.get("COLLECTOSS_DB") is None: + names = ["COLLECTOSS_DB_HOST", "COLLECTOSS_DB_USER", "COLLECTOSS_DB_PASSWORD", "COLLECTOSS_DB_NAME"] + values = [SystemEnv.get(n) for n in names] + + if all(map(lambda p: p is not None, values)): + host, user, passwd, name = values + logger.debug(f"Assembling COLLECTOSS_DB string from provided variables") + SystemEnv.set("COLLECTOSS_DB", f"postgresql+psycopg2://{user}:{passwd}@{host}/{name}") + else: + logger.warning("CollectOSS was unable to create your database connection string automatically") + logger.warning("The following environment variables are missing:") + for n, v in zip(names, values): + if v is None: + logger.warning(n) + + + + db_string = SystemEnv.get("COLLECTOSS_DB") + if db_string and "localhost" in db_string: + logger.debug(f"Swapping localhost in COLLECTOSS_DB string with docker host gateway name") + SystemEnv.set("COLLECTOSS_DB", db_string.replace("localhost", "host.docker.internal")) + elif db_string and "127.0.0.1" in db_string: + logger.debug(f"Swapping 127.0.0.1 in COLLECTOSS_DB string with docker host gateway name") + SystemEnv.set("COLLECTOSS_DB", db_string.replace("127.0.0.1", "host.docker.internal")) + + redis_string = SystemEnv.get("REDIS_CONN_STRING") + if redis_string and "localhost" in redis_string: + logger.debug(f"Swapping localhost in REDIS_CONN_STRING with docker host gateway name") + SystemEnv.set("REDIS_CONN_STRING", redis_string.replace("localhost", "host.docker.internal")) + elif redis_string and "127.0.0.1" in redis_string: + logger.debug(f"Swapping 127.0.0.1 in REDIS_CONN_STRING with docker host gateway name") + SystemEnv.set("REDIS_CONN_STRING", redis_string.replace("127.0.0.1", "host.docker.internal")) + + + # if user didnt specify gitlab credentials, just inject fake ones so we can start up. + if SystemEnv.get("COLLECTOSS_GITLAB_API_KEY") is None: + logger.debug(f"Detected no specified gitlab key, using made up values as a workaround") + SystemEnv.set("COLLECTOSS_GITLAB_API_KEY", "fake") + if SystemEnv.get("COLLECTOSS_GITLAB_USERNAME") is None: + logger.debug(f"Detected no specified gitlab username, using made up value as a workaround") + SystemEnv.set("COLLECTOSS_GITLAB_USERNAME", "fake") + + # provide a default value for the facade repo directory (assumes docker paths) + facade_repo_directory = SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") + if facade_repo_directory is None: + logger.debug(f"Setting default value for COLLECTOSS_FACADE_REPO_DIRECTORY") + SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", "/collectoss/facade/") + else: + # Check if the path is resolveable/make it absolute + logger.debug(f"Resolving full path to COLLECTOSS_FACADE_REPO_DIRECTORY") + SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", str(Path(facade_repo_directory).resolve(strict=True))) + + # ensure trailing slash is present + facade_repo_directory = SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") + if facade_repo_directory and not facade_repo_directory.endswith("/"): + facade_repo_directory += "/" + SystemEnv.set("COLLECTOSS_FACADE_REPO_DIRECTORY", facade_repo_directory) + +@deprecated("The bulk of this function is handling .git-credentials, which will be replaced with pygit2 (see issue #258)", category=None) +def setup_facade_directory(logger): + """Perform permission checks and create the facade directory if it doesnt exist + """ + + facade_directory_path = SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") or "/collectoss/facade/" + + facade_directory = Path(facade_directory_path) + + if not facade_directory.exists(): + logger.debug(f"Specified facade directory {facade_directory_path} does not exist. Creating...") + facade_directory.mkdir() + + git_credentials = facade_directory.joinpath(".git-credentials") + git_credentials.touch(exist_ok=True) + + if not os.access(git_credentials, os.R_OK): + logger.error(f"User {getpass.getuser()} does not have permission to write to {git_credentials}. Please select another location") + else: + logger.debug(f"Permission check passed for {git_credentials}") + + + credentials = [] + + gh_names = ["COLLECTOSS_GITHUB_USERNAME","COLLECTOSS_GITHUB_API_KEY"] + gh_values = [SystemEnv.get(n) for n in gh_names] + + if all(map(lambda p: p is not None, gh_values)): + user, key = gh_values + credentials.append(f"https://{user}:{key}@github.com") + + + gl_names = ["COLLECTOSS_GITLAB_USERNAME","COLLECTOSS_GITLAB_API_KEY"] + gl_values = [SystemEnv.get(n) for n in gl_names] + + if all(map(lambda p: p is not None, gl_values)): + user, key = gl_values + credentials.append(f"https://{user}:{key}@gitlab.com") + + with git_credentials.open("w", encoding="utf-8") as c: + c.writelines(credentials) + + subprocess.call(["git", "config", "--global", "credential.helper", f"store --file {str(git_credentials)}"]) + + +def merge_config( + engine, + logger, + github_api_key:str | None = None, + facade_repo_directory:str | None = None, + gitlab_api_key:str | None = None, + redis_conn_string:str | None = None, + rabbitmq_conn_string:str | None = None, + logs_directory:str | None = None + ): + """Merge config items provided via environment variables into a place where SystemConfig can pick them up. + + Args: + engine: the database engine to connect to + logger: object to use for outputting logging messages + github_api_key (str): config value + facade_repo_directory (str): config value + gitlab_api_key (str): config value + redis_conn_string (str): config value + rabbitmq_conn_string (str): config value + logs_directory (str): config value + """ + + github_api_key = github_api_key or SystemEnv.get("COLLECTOSS_GITHUB_API_KEY") + facade_repo_directory = facade_repo_directory or SystemEnv.get("COLLECTOSS_FACADE_REPO_DIRECTORY") + gitlab_api_key = gitlab_api_key or SystemEnv.get("COLLECTOSS_GITLAB_API_KEY") + redis_conn_string = redis_conn_string or SystemEnv.get("REDIS_CONN_STRING") + rabbitmq_conn_string = rabbitmq_conn_string or SystemEnv.get("RABBITMQ_CONN_STRING") + logs_directory = logs_directory or SystemEnv.get("COLLECTOSS_LOGS_DIRECTORY") + + keys = {} + + keys["github_api_key"] = github_api_key + keys["gitlab_api_key"] = gitlab_api_key + + with DatabaseSession(logger, engine=engine) as session: + + config = SystemConfig(logger, session) + + augmented_config = config.base_config + + phase_names = get_phase_names_without_import() + + #Add all phases as enabled by default + for name in phase_names: + + if name not in augmented_config['Task_Routine']: + augmented_config['Task_Routine'].update({name : 1}) + + #print(default_config) + if redis_conn_string: + + try: + redis_string_array = redis_conn_string.split("/") + cache_number = int(redis_string_array[-1]) + digits = len(str(cache_number)) + + redis_conn_string = redis_conn_string[:-digits] + + except ValueError: + pass + + augmented_config["Redis"]["connection_string"] = redis_conn_string + + if rabbitmq_conn_string: + augmented_config["RabbitMQ"]["connection_string"] = rabbitmq_conn_string + + augmented_config["Keys"] = keys + + augmented_config["Facade"]["repo_directory"] = facade_repo_directory + + augmented_config["Logging"]["logs_directory"] = logs_directory or (ROOT_PROJECT_REPO_DIRECTORY + "/logs/") + + config.load_config_from_dict(augmented_config) + + +@deprecated("automatic import is deprecated. This is a function to warn users and help them transition") +def warn_import_repos(logger): + """We are choosing not to auto import repos and repo groups automatically + This function detects attempts to use the automatic feature and warns users to use the CLI themselves + + Args: + logger: the logger to use + """ + + if Path("/repo_groups.csv").exists(): + logger.warning("Detected /repo_groups.csv file at startup. Automatic import of repo groups is deprecated.") + logger.warning("To import repo groups from a CSV, use the CLI: collectoss db add-repo-groups /repo_groups.csv") + + if Path("/repos.csv").exists(): + logger.warning("Detected /repos.csv file at startup. Automatic import of repos is deprecated.") + logger.warning("To import repos from a CSV, use the CLI: collectoss db add-repos /repos.csv") + + +def print_platform_information(logger): + logger.info(f"PATH: {os.environ.get('PATH')}") + logger.info(f"Python executable (current): {sys.executable}") + logger.info(f"Python version: {platform.python_version()}") \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 55e1127be..82268e7e5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,11 +7,11 @@ services: restart: unless-stopped environment: - "POSTGRES_DB=augur" - - "POSTGRES_USER=${AUGUR_DB_USER:-augur}" - - "POSTGRES_PASSWORD=${AUGUR_DB_PASSWORD:-augur}" + - "POSTGRES_USER=${COLLECTOSS_DB_USER:-augur}" + - "POSTGRES_PASSWORD=${COLLECTOSS_DB_PASSWORD:-augur}" - "PGDATA=/var/lib/postgresql/data/pgdata" ports: - - "${AUGUR_DB_PORT:-5432}:5432" + - "${COLLECTOSS_DB_PORT:-5432}:5432" volumes: - augurpostgres:/var/lib/postgresql/data @@ -36,15 +36,15 @@ services: context: . dockerfile: ./docker/rabbitmq/Dockerfile args: - - RABBIT_MQ_DEFAULT_USER=${AUGUR_RABBITMQ_USERNAME:-augur} - - RABBIT_MQ_DEFAULT_PASSWORD=${AUGUR_RABBITMQ_PASSWORD:-password123} - - RABBIT_MQ_DEFAULT_VHOST=${AUGUR_RABBITMQ_VHOST:-collectoss_vhost} + - RABBIT_MQ_DEFAULT_USER=${COLLECTOSS_RABBITMQ_USERNAME:-augur} + - RABBIT_MQ_DEFAULT_PASSWORD=${COLLECTOSS_RABBITMQ_PASSWORD:-password123} + - RABBIT_MQ_DEFAULT_VHOST=${COLLECTOSS_RABBITMQ_VHOST:-collectoss_vhost} core: image: collectoss:latest build: context: . - dockerfile: ./docker/backend/${AUGUR_TARGET:-Dockerfile} + dockerfile: ./docker/backend/${COLLECTOSS_TARGET:-Dockerfile} volumes: - cache:/cache:rw - config:/config:rw @@ -56,16 +56,18 @@ services: #extra_hosts: # - "host.docker.internal:host-gateway" #Be able to ping services on the local machine environment: - - "AUGUR_DB=postgresql+psycopg2://${AUGUR_DB_USER:-augur}:${AUGUR_DB_PASSWORD:-augur}@database:5432/augur" - - "AUGUR_DB_SCHEMA_BUILD=1" - - AUGUR_FACADE_REPO_DIRECTORY=/facade - - "AUGUR_FLAGS=$AUGUR_FLAGS" - - "AUGUR_GITHUB_API_KEY=${AUGUR_GITHUB_API_KEY}" - - "AUGUR_GITLAB_API_KEY=${AUGUR_GITLAB_API_KEY}" - - "AUGUR_GITHUB_USERNAME=${AUGUR_GITHUB_USERNAME}" - - "AUGUR_GITLAB_USERNAME=${AUGUR_GITLAB_USERNAME}" + - "COLLECTOSS_DB=postgresql+psycopg2://${COLLECTOSS_DB_USER:-augur}:${COLLECTOSS_DB_PASSWORD:-augur}@database:5432/augur" + - "COLLECTOSS_DB_SCHEMA_BUILD=1" + - COLLECTOSS_FACADE_REPO_DIRECTORY=/facade + - COLLECTOSS_LOGS_DIRECTORY=/logs + - "COLLECTOSS_FLAGS=$COLLECTOSS_FLAGS" + - "COLLECTOSS_GITHUB_API_KEY=${COLLECTOSS_GITHUB_API_KEY}" + - "COLLECTOSS_GITLAB_API_KEY=${COLLECTOSS_GITLAB_API_KEY}" + - "COLLECTOSS_GITHUB_USERNAME=${COLLECTOSS_GITHUB_USERNAME}" + - "COLLECTOSS_GITLAB_USERNAME=${COLLECTOSS_GITLAB_USERNAME}" + - COLLECTOSS_DOCKER_DEPLOY=1 - REDIS_CONN_STRING=redis://redis:6379 - - RABBITMQ_CONN_STRING=amqp://${AUGUR_RABBITMQ_USERNAME:-augur}:${AUGUR_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${AUGUR_RABBITMQ_VHOST:-collectoss_vhost} + - RABBITMQ_CONN_STRING=amqp://${COLLECTOSS_RABBITMQ_USERNAME:-augur}:${COLLECTOSS_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${COLLECTOSS_RABBITMQ_VHOST:-collectoss_vhost} - CONFIG_LOCATION=/config/config.yml - CONFIG_DATADIR=/config - CACHE_DATADIR=/cache @@ -92,9 +94,9 @@ services: # ports: # - 5555:5555 # environment: - # - "AUGUR_DB=postgresql+psycopg2://${AUGUR_DB_USER:-augur}:${AUGUR_DB_PASSWORD:-augur}@database:5432/augur" + # - "COLLECTOSS_DB=postgresql+psycopg2://${COLLECTOSS_DB_USER:-augur}:${COLLECTOSS_DB_PASSWORD:-augur}@database:5432/augur" # - REDIS_CONN_STRING=redis://redis:6379 - # - RABBITMQ_CONN_STRING=amqp://${AUGUR_RABBITMQ_USERNAME:-augur}:${AUGUR_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${AUGUR_RABBITMQ_VHOST:-collectoss_vhost} + # - RABBITMQ_CONN_STRING=amqp://${COLLECTOSS_RABBITMQ_USERNAME:-augur}:${COLLECTOSS_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${COLLECTOSS_RABBITMQ_VHOST:-collectoss_vhost} # depends_on: # - core # - database diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index d3ada5bf0..65b2ccdbb 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -33,6 +33,8 @@ LABEL org.opencontainers.image.revision=${REVISION} ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/usr/bin/:/usr/local/bin:/usr/lib:${PATH}" +ENV COLLECTOSS_DOCKER_DEPLOY="1" + RUN set -x \ && apt-get update \ && apt-get -y install --no-install-recommends \ @@ -111,9 +113,4 @@ RUN ${SCORECARD_DIR}/scorecard version RUN mkdir -p repos/ logs/ /collectoss/facade/ RUN ln -s /cache /collectoss/collectoss/static/cache -# Copy in the entrypoint and init scripts, ensuring they are executable -COPY --chmod=755 ./docker/backend/entrypoint.sh / -COPY --chmod=755 ./docker/backend/init.sh / -RUN chmod +x /entrypoint.sh /init.sh -ENTRYPOINT ["/bin/bash", "/entrypoint.sh"] -CMD ["/init.sh"] +CMD ["collectoss", "backend", "start", "--pidfile", "/tmp/main.pid"] diff --git a/docker/backend/entrypoint.sh b/docker/backend/entrypoint.sh deleted file mode 100644 index 78eda49e2..000000000 --- a/docker/backend/entrypoint.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -#SPDX-License-Identifier: MIT -set -e - -if [[ -z "$AUGUR_DB" ]]; then - # If AUGUR_DB is not set, check for individual environment variables and construct AUGUR_DB connection string - if [[ -n "$AUGUR_DB_HOST" ]] && [[ -n "$AUGUR_DB_USER" ]] && [[ -n "$AUGUR_DB_PASSWORD" ]] && [[ -n "$AUGUR_DB_NAME" ]]; then - export AUGUR_DB="postgresql+psycopg2://${AUGUR_DB_USER}:${AUGUR_DB_PASSWORD}@${AUGUR_DB_HOST}/${AUGUR_DB_NAME}" - fi -fi - - -if [[ "$AUGUR_DB" == *"localhost"* ]]; then - echo "localhost db connection" - export AUGUR_DB="${AUGUR_DB/localhost/host.docker.internal}" -elif [[ "$AUGUR_DB" == *"127.0.0.1"* ]]; then - echo "localhost db connection" - export AUGUR_DB="${AUGUR_DB/127.0.0.1/host.docker.internal}" -fi - -export AUGUR_FACADE_REPO_DIRECTORY=${AUGUR_FACADE_REPO_DIRECTORY:-/collectoss/facade/} -export AUGUR_DOCKER_DEPLOY="1" - -#Deal with special case where 'localhost' is the machine that started the container -if [[ "$REDIS_CONN_STRING" == *"localhost"* ]] || [[ "$REDIS_CONN_STRING" == *"127.0.0.1"* ]]; then - echo "localhost redis connection" - export redis_conn_string="redis://host.docker.internal:6379" -else - export redis_conn_string=$REDIS_CONN_STRING -fi - -exec "$@" diff --git a/docker/backend/init.sh b/docker/backend/init.sh deleted file mode 100644 index 782b8fa53..000000000 --- a/docker/backend/init.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -#SPDX-License-Identifier: MIT -set -e - -if [[ "$AUGUR_DB_SCHEMA_BUILD" == "1" ]]; then - collectoss db create-schema -fi - - -if [ ! -v AUGUR_NO_CONFIG ]; then - ./scripts/docker/config.sh docker -fi - -if [[ -f /repo_groups.csv ]]; then - collectoss db add-repo-groups /repo_groups.csv -fi - -if [[ -f /repos.csv ]]; then - collectoss db add-repos /repos.csv -fi - -echo "PATH: $PATH" -echo "Python executable: $(which python)" -python --version - -exec collectoss backend start --pidfile /tmp/main.pid diff --git a/docs/source/deployment/production.rst b/docs/source/deployment/production.rst index e65a987f1..186a38c4c 100644 --- a/docs/source/deployment/production.rst +++ b/docs/source/deployment/production.rst @@ -11,11 +11,11 @@ Environment Variables CollectOSS uses several environment variables in production. Make sure to configure the ones relevant to your deployment: -- ``AUGUR_RESET_LOGS`` : Controls automatic log reset on server startup -- ``AUGUR_DB`` : PostgreSQL database connection string (used if variable not set) +- ``COLLECTOSS_RESET_LOGS`` : Controls automatic log reset on server startup +- ``COLLECTOSS_DB`` : PostgreSQL database connection string (used if variable not set) -AUGUR_RESET_LOGS ----------------- +COLLECTOSS_RESET_LOGS +--------------------- **Description:** Controls whether CollectOSS resets its log files every time the server starts. Useful for managing log size or integrating with external log rotation systems. @@ -27,7 +27,7 @@ boolean `True` : CollectOSS clears old logs at startup. **Environment Variable:** -AUGUR_RESET_LOGS +COLLECTOSS_RESET_LOGS **Notes:** If set to `False`, CollectOSS will not reset logs automatically. Administrators must ensure log rotation or cleanup is handled manually. @@ -36,10 +36,10 @@ If set to `False`, CollectOSS will not reset logs automatically. Administrators .. code-block:: bash - export AUGUR_RESET_LOGS=False + export COLLECTOSS_RESET_LOGS=False -AUGUR_DB --------- +COLLECTOSS_DB +------------- **Description:** Specifies the connection string for the PostgreSQL database used by CollectOSS. If omitted, the default Docker database is used. @@ -48,10 +48,10 @@ Specifies the connection string for the PostgreSQL database used by CollectOSS. string **Default:** -Docker container database (if `AUGUR_DB` is not specified) +Docker container database (if `COLLECTOSS_DB` is not specified) **Environment Variable:** -AUGUR_DB +COLLECTOSS_DB Related Resources ----------------- diff --git a/docs/source/development-guide/create-a-metric/api-development.rst b/docs/source/development-guide/create-a-metric/api-development.rst index 8aea48aac..12a010465 100644 --- a/docs/source/development-guide/create-a-metric/api-development.rst +++ b/docs/source/development-guide/create-a-metric/api-development.rst @@ -11,13 +11,13 @@ JSON Metrics are here: .. code-block:: bash - $ AUGUR_HOME/collectoss/metrics + $ COLLECTOSS_HOME/collectoss/metrics Visualization Metrics are here: .. code-block:: bash - $ AUGUR_HOME/collectoss/routes + $ COLLECTOSS_HOME/collectoss/routes Existing metrics files (JSON Metric) "Standard Metrics": @@ -46,7 +46,7 @@ You can see that one of the imports is our standard metric import from the util .. code-block:: python - AUGUR_HOME/collectoss/routes/util.py + COLLECTOSS_HOME/collectoss/routes/util.py All "Standard Metrics" share declaration and a method signature diff --git a/docs/source/development-guide/create-a-metric/metrics-steps.rst b/docs/source/development-guide/create-a-metric/metrics-steps.rst index 5604c422b..a2fb24a02 100644 --- a/docs/source/development-guide/create-a-metric/metrics-steps.rst +++ b/docs/source/development-guide/create-a-metric/metrics-steps.rst @@ -11,7 +11,7 @@ There are many paths, but we usually follow something along these lines: 2. Sometimes, there are metrics endpoints that integrate, or visualize several metrics. 3. Determine what tables in the CollectOSS Schema contain the data we need to develop this metric 4. Construct a very basic query that does the work of joining those tables in a minimal way so we have a "baseline query." -5. Refine the query so that it takes the standard inputs for a "standard metric" if that's what type it is; alternatively, look at non-standard metrics as they are defined in ``AUGUR_HOME/collectoss/routes``, or one of the visualization metrics in ``AUGUR_HOME/collectoss/routes/contributor.py``, ``AUGUR_HOME/collectoss/routes/pull_requests.py`` or ``AUGUR_HOME/collectoss/routes/nonstandard_metrics.py``. (This step is explained in the next section.) +5. Refine the query so that it takes the standard inputs for a "standard metric" if that's what type it is; alternatively, look at non-standard metrics as they are defined in ``COLLECTOSS_HOME/collectoss/routes``, or one of the visualization metrics in ``COLLECTOSS_HOME/collectoss/routes/contributor.py``, ``COLLECTOSS_HOME/collectoss/routes/pull_requests.py`` or ``COLLECTOSS_HOME/collectoss/routes/nonstandard_metrics.py``. (This step is explained in the next section.) Example Query diff --git a/docs/source/development-guide/workers/creating_a_new_worker.rst b/docs/source/development-guide/workers/creating_a_new_worker.rst index 4e713c4ac..a34d73f4b 100644 --- a/docs/source/development-guide/workers/creating_a_new_worker.rst +++ b/docs/source/development-guide/workers/creating_a_new_worker.rst @@ -132,7 +132,7 @@ In the Worker block you need to add something like this: There should NOT be a comma after the final entry in each block. -ALSO, if you wanted to have those blocks installed with auger itself when you do the PR, you need to add them to the `$AUGUR_ROOT/collectoss/config.py` file. The recommended way is to set a port range not already in use and assign a random variable range with the others, like this `your_new_worker_p = randint(56500, 56999)` ... its totally ok to compress a couple other port ranges for this process. +ALSO, if you wanted to have those blocks installed with auger itself when you do the PR, you need to add them to the `$COLLECTOSS_ROOT/collectoss/config.py` file. The recommended way is to set a port range not already in use and assign a random variable range with the others, like this `your_new_worker_p = randint(56500, 56999)` ... its totally ok to compress a couple other port ranges for this process. You can copy the housekeeper block verbatim from what you added to your own `augur.config.json`. For the worker block, in the `config.py` it would look like this: diff --git a/docs/source/docker/docker-compose.rst b/docs/source/docker/docker-compose.rst index 5c5d16a47..96e8e1c51 100644 --- a/docs/source/docker/docker-compose.rst +++ b/docs/source/docker/docker-compose.rst @@ -27,16 +27,16 @@ This section of the documentation details how to use CollectOSS's Docker Compose .. warning:: Don't forget to provide your external database credentials in a file called ``.env`` file. Make sure all the following environment variables are specified, keep placeholder values if you don't need some of them. - Don't specify AUGUR_DB if you want the docker database to be used. + Don't specify COLLECTOSS_DB if you want the docker database to be used. Example .env: .. code:: - AUGUR_GITHUB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx - AUGUR_GITHUB_USERNAME=usernameGithub - AUGUR_GITLAB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx - AUGUR_GITLAB_USERNAME=usernameGitlab - AUGUR_DB=yourDBString + COLLECTOSS_GITHUB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx + COLLECTOSS_GITHUB_USERNAME=usernameGithub + COLLECTOSS_GITLAB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx + COLLECTOSS_GITLAB_USERNAME=usernameGitlab + COLLECTOSS_DB=yourDBString diff --git a/docs/source/docker/getting-started.rst b/docs/source/docker/getting-started.rst index 0648236a5..db6822b79 100644 --- a/docs/source/docker/getting-started.rst +++ b/docs/source/docker/getting-started.rst @@ -31,14 +31,14 @@ with the following fields (don't remove any variable, keep placeholder values if .. code:: python - AUGUR_DB=collectoss - AUGUR_DB_USER=collectoss - AUGUR_DB_PASSWORD=password_here + COLLECTOSS_DB=collectoss + COLLECTOSS_DB_USER=collectoss + COLLECTOSS_DB_PASSWORD=password_here - AUGUR_GITHUB_API_KEY=ghp_value_here - AUGUR_GITHUB_USERNAME=gh_username - AUGUR_GITLAB_API_KEY=placeholder - AUGUR_GITLAB_USERNAME=placeholder + COLLECTOSS_GITHUB_API_KEY=ghp_value_here + COLLECTOSS_GITHUB_USERNAME=gh_username + COLLECTOSS_GITLAB_API_KEY=placeholder + COLLECTOSS_GITLAB_USERNAME=placeholder Then run: @@ -98,11 +98,11 @@ You can provide your own ``.env`` file to pull from. The file should have the be .. code:: - AUGUR_GITHUB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx - AUGUR_GITHUB_USERNAME=usernameGithub - AUGUR_GITLAB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx - AUGUR_GITLAB_USERNAME=usernameGitlab - AUGUR_DB=yourDBString + COLLECTOSS_GITHUB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx + COLLECTOSS_GITHUB_USERNAME=usernameGithub + COLLECTOSS_GITLAB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx + COLLECTOSS_GITLAB_USERNAME=usernameGitlab + COLLECTOSS_DB=yourDBString Now that you've created your config file or are ready to generate it yourself, you're ready to `get going `_ . diff --git a/docs/source/docker/quick-start.rst b/docs/source/docker/quick-start.rst index 86b552ea3..c71d9dfa2 100644 --- a/docs/source/docker/quick-start.rst +++ b/docs/source/docker/quick-start.rst @@ -13,14 +13,14 @@ Before you get off to such a quick start, go ahead and .. code:: python - AUGUR_DB=collectoss - AUGUR_DB_USER=collectoss - AUGUR_DB_PASSWORD=password_here + COLLECTOSS_DB=collectoss + COLLECTOSS_DB_USER=collectoss + COLLECTOSS_DB_PASSWORD=password_here - AUGUR_GITHUB_API_KEY=ghp_value_here - AUGUR_GITHUB_USERNAME=gh_username - AUGUR_GITLAB_API_KEY=placeholder - AUGUR_GITLAB_USERNAME=placeholder + COLLECTOSS_GITHUB_API_KEY=ghp_value_here + COLLECTOSS_GITHUB_USERNAME=gh_username + COLLECTOSS_GITLAB_API_KEY=placeholder + COLLECTOSS_GITLAB_USERNAME=placeholder 5. Build the container using one of the following commands: @@ -57,14 +57,14 @@ And collectoss should be up and running! .. code-block:: - AUGUR_DB=collectoss - AUGUR_DB_USER=collectoss - AUGUR_DB_PASSWORD=password_here + COLLECTOSS_DB=collectoss + COLLECTOSS_DB_USER=collectoss + COLLECTOSS_DB_PASSWORD=password_here - AUGUR_GITHUB_API_KEY=ghp_value_here - AUGUR_GITHUB_USERNAME=gh_username - AUGUR_GITLAB_API_KEY=placeholder - AUGUR_GITLAB_USERNAME=placeholder + COLLECTOSS_GITHUB_API_KEY=ghp_value_here + COLLECTOSS_GITHUB_USERNAME=gh_username + COLLECTOSS_GITLAB_API_KEY=placeholder + COLLECTOSS_GITLAB_USERNAME=placeholder 4. Execute the code from the base directory of the CollectOSS repository: diff --git a/docs/source/getting-started/command-line-interface/backend.rst b/docs/source/getting-started/command-line-interface/backend.rst index d53fd36ae..2adcce0ef 100644 --- a/docs/source/getting-started/command-line-interface/backend.rst +++ b/docs/source/getting-started/command-line-interface/backend.rst @@ -145,29 +145,29 @@ Successful output looks like: .. code-block:: bash - > CLI: [util.export_env] [INFO] Exporting AUGUR_GITHUB_API_KEY - > CLI: [util.export_env] [INFO] Exporting AUGUR_DB_HOST - > CLI: [util.export_env] [INFO] Exporting AUGUR_DB_NAME - > CLI: [util.export_env] [INFO] Exporting AUGUR_DB_PORT - > CLI: [util.export_env] [INFO] Exporting AUGUR_DB_USER - > CLI: [util.export_env] [INFO] Exporting AUGUR_DB_PASSWORD + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_GITHUB_API_KEY + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_DB_HOST + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_DB_NAME + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_DB_PORT + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_DB_USER + > CLI: [util.export_env] [INFO] Exporting COLLECTOSS_DB_PASSWORD # contents of collectoss_export_env.sh #!/bin/bash - export AUGUR_GITHUB_API_KEY="your_key_here" - export AUGUR_DB_HOST="your_host" - export AUGUR_DB_NAME="your_db_name" - export AUGUR_DB_PORT="your_db_port" - export AUGUR_DB_USER="your_db_user" - export AUGUR_DB_PASSWORD="your_db_password" + export COLLECTOSS_GITHUB_API_KEY="your_key_here" + export COLLECTOSS_DB_HOST="your_host" + export COLLECTOSS_DB_NAME="your_db_name" + export COLLECTOSS_DB_PORT="your_db_port" + export COLLECTOSS_DB_USER="your_db_user" + export COLLECTOSS_DB_PASSWORD="your_db_password" # contents of docker_env.txt - AUGUR_GITHUB_API_KEY="your_key_here" - AUGUR_DB_HOST="your_host" - AUGUR_DB_NAME="your_db_name" - AUGUR_DB_PORT="your_db_port" - AUGUR_DB_USER="your_db_user" - AUGUR_DB_PASSWORD="your_db_password" + COLLECTOSS_GITHUB_API_KEY="your_key_here" + COLLECTOSS_DB_HOST="your_host" + COLLECTOSS_DB_NAME="your_db_name" + COLLECTOSS_DB_PORT="your_db_port" + COLLECTOSS_DB_USER="your_db_user" + COLLECTOSS_DB_PASSWORD="your_db_password" ``repo-reset`` diff --git a/docs/source/getting-started/command-line-interface/configure.rst b/docs/source/getting-started/command-line-interface/configure.rst index 5659cf6ec..89350bc1a 100644 --- a/docs/source/getting-started/command-line-interface/configure.rst +++ b/docs/source/getting-started/command-line-interface/configure.rst @@ -12,19 +12,19 @@ The ``init`` command is used to create a configuration file, by default named `` Each of the available parameters is optional, and can also be configured using an existing environment variable. Below is the list of available parameters, their defaults, and the corresponding environment variable. ---db_name Database name for your data collection database. Defaults to ``augur``. Set by the ``AUGUR_DB_NAME`` environment variable +--db_name Database name for your data collection database. Defaults to ``augur``. Set by the ``COLLECTOSS_DB_NAME`` environment variable ---db_host Host for your data collection database. Defaults to ``localhost``. Set by the ``AUGUR_DB_HOST`` environment variable +--db_host Host for your data collection database. Defaults to ``localhost``. Set by the ``COLLECTOSS_DB_HOST`` environment variable ---db_user User for your data collection database. Defaults to ``augur``. Set by the ``AUGUR_DB_USER`` environment variable +--db_user User for your data collection database. Defaults to ``augur``. Set by the ``COLLECTOSS_DB_USER`` environment variable ---db_port Port for your data collection database. Defaults to ``5432``. Set by the ``AUGUR_DB_PORT`` environment variable +--db_port Port for your data collection database. Defaults to ``5432``. Set by the ``COLLECTOSS_DB_PORT`` environment variable ---db_password Password for your data collection database. Defaults to ``augur``. Set by the ``AUGUR_DB_PASSWORD`` environment variable +--db_password Password for your data collection database. Defaults to ``augur``. Set by the ``COLLECTOSS_DB_PASSWORD`` environment variable ---github_api_key GitHub API key for data collection from the GitHub API. Defaults to ``key``. Set by the ``AUGUR_GITHUB_API_KEY`` environment variable +--github_api_key GitHub API key for data collection from the GitHub API. Defaults to ``key``. Set by the ``COLLECTOSS_GITHUB_API_KEY`` environment variable ---facade_repo_directory The directory on this machine where Facade should store its cloned repos. Defaults to ``repos/``. Set by the ``AUGUR_FACADE_REPO_DIRECTORY`` environment variable +--facade_repo_directory The directory on this machine where Facade should store its cloned repos. Defaults to ``repos/``. Set by the ``COLLECTOSS_FACADE_REPO_DIRECTORY`` environment variable --rc-config-file Path to an existing CollectOSS config file whose values will be used as the defaults. Defaults to ``None``. This parameter does not support being set by an environment variable. @@ -41,7 +41,7 @@ Example usage\: $ uv run collectoss config init --db_name "db_name" --db_host "host" --db_port "port" --db_user "db_user" --db_password "password" --github_api_key "github_api_key" --facade_repo_directory "facade_repo_directory" # to generate an augur.config.json given all credentials and environment variables - $ uv run collectoss config init --db_name $AUGUR_DB_NAME --db_host $AUGUR_DB_HOST --db_port $AUGUR_DB_PORT --db_user $AUGUR_DB_DB_USER --db_password $AUGUR_DB_PASSWORD --github_api_key $AUGUR_GITHUB_API_KEY --facade_repo_directory $AUGUR_FACADE_REPO_DIRECTORY + $ uv run collectoss config init --db_name $COLLECTOSS_DB_NAME --db_host $COLLECTOSS_DB_HOST --db_port $COLLECTOSS_DB_PORT --db_user $COLLECTOSS_DB_DB_USER --db_password $COLLECTOSS_DB_PASSWORD --github_api_key $COLLECTOSS_GITHUB_API_KEY --facade_repo_directory $COLLECTOSS_FACADE_REPO_DIRECTORY # successful output looks like: > CLI: [config.init] [INFO] Config written to /Users/carter/.collectoss/augur.config.json diff --git a/docs/source/getting-started/using-docker.rst b/docs/source/getting-started/using-docker.rst index 5028d5c3a..c427372b1 100644 --- a/docs/source/getting-started/using-docker.rst +++ b/docs/source/getting-started/using-docker.rst @@ -14,14 +14,14 @@ the following resources (or more). .. code:: python - AUGUR_DB=augur - AUGUR_DB_USER=augur - AUGUR_DB_PASSWORD=password_here - - AUGUR_GITHUB_API_KEY=ghp_value_here - AUGUR_GITHUB_USERNAME=gh_username - AUGUR_GITLAB_API_KEY=placeholder - AUGUR_GITLAB_USERNAME=placeholder + COLLECTOSS_DB=augur + COLLECTOSS_DB_USER=augur + COLLECTOSS_DB_PASSWORD=password_here + + COLLECTOSS_GITHUB_API_KEY=ghp_value_here + COLLECTOSS_GITHUB_USERNAME=gh_username + COLLECTOSS_GITLAB_API_KEY=placeholder + COLLECTOSS_GITLAB_USERNAME=placeholder 3. Build the container using one of the following commands: diff --git a/environment.txt b/environment.txt index 42d00b9c1..3d4c4a721 100644 --- a/environment.txt +++ b/environment.txt @@ -1,12 +1,12 @@ -AUGUR_DB_HOST=collectoss -AUGUR_DB_NAME=collectoss -AUGUR_DB_USER=collectoss -AUGUR_DB_PASSWORD= +COLLECTOSS_DB_HOST=collectoss +COLLECTOSS_DB_NAME=collectoss +COLLECTOSS_DB_USER=collectoss +COLLECTOSS_DB_PASSWORD= -AUGUR_GITHUB_API_KEY= -AUGUR_GITHUB_USERNAME= -AUGUR_GITLAB_API_KEY= -AUGUR_GITLAB_USERNAME= +COLLECTOSS_GITHUB_API_KEY= +COLLECTOSS_GITHUB_USERNAME= +COLLECTOSS_GITLAB_API_KEY= +COLLECTOSS_GITLAB_USERNAME= -AUGUR_RABBITMQ_USERNAME= -AUGUR_RABBITMQ_PASSWORD= +COLLECTOSS_RABBITMQ_USERNAME= +COLLECTOSS_RABBITMQ_PASSWORD= diff --git a/pyproject.toml b/pyproject.toml index 6a99aacf5..5671e21de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -163,7 +163,10 @@ markers = [ [tool.mypy] -files = ['collectoss/application/db/*.py'] +files = [ + 'collectoss/application/db/*.py', + 'collectoss/application/environment.py', +] ignore_missing_imports = true follow_imports = "skip" disallow_untyped_defs = false diff --git a/scripts/docker/config.sh b/scripts/docker/config.sh deleted file mode 100755 index 6f92c9a36..000000000 --- a/scripts/docker/config.sh +++ /dev/null @@ -1,244 +0,0 @@ -#!/bin/bash - -PS3=" -Please type the number corresponding to your selection and then press the Enter/Return key. -Your choice: " - -target=$1 - -function blank_confirm() { - if [ -z "${1}" ]; then - echo "Bad usage of blank_confirm at:" - caller - return - fi - - confirm_placeholder=${!1} - - while [ -z "${confirm_placeholder}" ]; do - echo "You entered a blank line, are you sure?" - read -p "enter 'yes' to continue, or enter the intended value: " confirm_placeholder - case "$confirm_placeholder" in - [yY][eE][sS] | [yY][eE] | [yY]) - return - ;; - *) - continue - ;; - esac - done - printf -v "$1" "%s" $confirm_placeholder -} - -function get_github_username() { - echo - echo "Please provide your username for Github." - echo "** This is required for CollectOSS to clone Github repos ***" - read -p "GitHub username: " github_username - blank_confirm github_username - echo -} - -function get_github_api_key() { - echo - echo "Please provide a valid GitHub API key." - echo "For more information on how to create the key, visit:" - echo "https://docs.collectoss.org/en/latest/getting-started/collecting-data.html" - echo "** This is required for CollectOSS to gather data ***" - read -p "GitHub API Key: " github_api_key - blank_confirm github_api_key - echo -} - -function get_gitlab_username() { - echo - echo "Please provide your username for GitLab." - echo "** This is required for CollectOSS to clone GitLab repos ***" - read -p "GitLab username: " gitlab_username - blank_confirm gitlab_username - echo -} - -function get_gitlab_api_key() { - echo - echo "Please provide a valid GitLab API key." - echo "For more information on how to create the key, visit:" - echo "https://docs.collectoss.org/en/latest/getting-started/collecting-data.html" - echo "** This is required for CollectOSS to gather data ***" - read -p "GitLab API Key: " gitlab_api_key - blank_confirm gitlab_api_key - echo -} - -function get_facade_repo_path() { - - echo "The Facade data collection worker will clone repositories to this machine to run its analysis." - echo "Please select a new or existing directory for the Facade worker to use:" - echo - - while true; do - read -e -p "Facade worker directory: " facade_repo_directory - blank_confirm facade_repo_directory - - facade_repo_directory=$(realpath $facade_repo_directory) - echo - - # if ! [ -w $facade_repo_directory/.git-credentials ]; then - # echo "User $(whoami) does not have permission to write to that location" - # echo "Please select another location" - # continue - # fi - - # Check if the file exists and create it if it doesn't - if [ ! -f "$facade_repo_directory/.git-credentials" ]; then - echo "File .git-credentials does not exist. Creating it..." - touch "$facade_repo_directory/.git-credentials" - fi - - # Check for write permissions - if ! [ -w "$facade_repo_directory/.git-credentials" ]; then - echo "User $(whoami) does not have permission to write to $facade_repo_directory/.git-credentials" - echo "Please select another location" - continue - else - echo "Permission check passed for $facade_repo_directory/.git-credentials" - fi - - if [[ -d "$facade_repo_directory" ]]; then - read -r -p "That directory already exists. Use it? [Y/n]: " facade_response - case "$facade_response" in - [nN][oO] | [nN]) - continue - ;; - *) - break - ;; - esac - else - read -r -p "That directory does not exist. Create it? [Y/n]: " facade_response - case "$facade_response" in - [nN][oO] | [nN]) - continue - ;; - *) - mkdir "$facade_repo_directory" - echo "Directory created." - break - ;; - esac - fi - done - - [[ "${facade_repo_directory}" != */ ]] && facade_repo_directory="${facade_repo_directory}/" -} - -function get_rabbitmq_broker_url() { - echo - echo "Please provide your rabbitmq broker url." - echo "** This is required for CollectOSS to run all collection tasks. ***" - read -p "broker_url: " rabbitmq_conn_string - blank_confirm rabbitmq_conn_string - echo -} - -function create_config() { - - if [[ -z "${AUGUR_GITHUB_API_KEY}" ]]; then - get_github_api_key - else - echo - echo "Found AUGUR_GITHUB_API_KEY environment variable" - echo "Using it in the config" - echo "Please unset AUGUR_GITHUB_API_KEY if you would like to be prompted for a github api key" - github_api_key=$AUGUR_GITHUB_API_KEY - echo - fi - - if [[ -z "${AUGUR_GITHUB_USERNAME}" ]]; then - get_github_username - else - echo - echo "Found AUGUR_GITHUB_USERNAME environment variable" - echo "Using it in the config" - echo "Please unset AUGUR_GITHUB_USERNAME if you would like to be prompted for a github username" - github_username=$AUGUR_GITHUB_USERNAME - echo - fi - - if [[ -z "${AUGUR_GITLAB_API_KEY}" ]]; then - get_gitlab_api_key - else - echo - echo "Found AUGUR_GITLAB_API_KEY environment variable" - echo "Using it in the config" - echo "Please unset AUGUR_GITLAB_API_KEY if you would like to be prompted for a gitlab api key" - gitlab_api_key=$AUGUR_GITLAB_API_KEY - echo - fi - - if [[ -z "${AUGUR_GITLAB_USERNAME}" ]]; then - get_gitlab_username - else - echo - echo "Found AUGUR_GITLAB_USERNAME environment variable" - echo "Using it in the config" - echo "Please unset AUGUR_GITLAB_USERNAME if you would like to be prompted for a gitlab username" - gitlab_username=$AUGUR_GITLAB_USERNAME - echo - fi - - if [[ -z "${AUGUR_FACADE_REPO_DIRECTORY}" ]]; then - get_facade_repo_path - else - echo - echo "Found AUGUR_FACADE_REPO_DIRECTORY environment variable with value $AUGUR_FACADE_REPO_DIRECTORY" - echo "Using it in the config" - echo "IMPORTANT NOTE: This assumes that this directory already exists" - echo "Please unset AUGUR_FACADE_REPO_DIRECTORY if you would like to be prompted for the facade repo directory" - facade_repo_directory=$AUGUR_FACADE_REPO_DIRECTORY - echo - fi - - if [[ -z "${RABBITMQ_CONN_STRING}" ]]; then - get_rabbitmq_broker_url - else - echo - echo "Found RABBITMQ_CONN_STRING environment variable with value $RABBITMQ_CONN_STRING" - echo "Using it in the config" - echo "Please unset RABBITMQ_CONN_STRING if you would like to be prompted for the rabbit MQ connection string" - rabbitmq_conn_string=$RABBITMQ_CONN_STRING - echo - fi - - # echo $rabbitmq_conn_string - # echo $facade_repo_directory - # echo $gitlab_username - # echo $gitlab_api_key - # echo $github_username - # echo $github_api_key - - #special case for docker entrypoint - if [ $target = "docker" ]; then - cmd=( collectoss config init --github-api-key $github_api_key --gitlab-api-key $gitlab_api_key --facade-repo-directory $facade_repo_directory --redis-conn-string $redis_conn_string --rabbitmq-conn-string $rabbitmq_conn_string --logs-directory /logs) - echo "init with redis $redis_conn_string" - else - cmd=( collectoss config init --github-api-key $github_api_key --gitlab-api-key $gitlab_api_key --facade-repo-directory $facade_repo_directory --rabbitmq-conn-string $rabbitmq_conn_string ) - fi - - #Create and cache credentials for github and gitlab - touch $facade_repo_directory/.git-credentials - - echo "https://$github_username:$github_api_key@github.com" > $facade_repo_directory/.git-credentials - echo "https://$gitlab_username:$gitlab_api_key@gitlab.com" >> $facade_repo_directory/.git-credentials - - git config --global credential.helper "store --file $facade_repo_directory/.git-credentials" - "${cmd[@]}" -} -echo -echo "Collecting data for config..." -create_config -echo -echo "Config created" -echo - -# config_prompt diff --git a/tests/test_classes/test_environment.py b/tests/test_classes/test_environment.py new file mode 100644 index 000000000..e6621062a --- /dev/null +++ b/tests/test_classes/test_environment.py @@ -0,0 +1,83 @@ +from collectoss.application.environment import SystemEnv, extract_prefix +import logging +import os + +logger = logging.getLogger(__name__) + +prefixes = ["COLLECTOSS", "OTHER"] + +class TestExtractPrefix: + def test_env_extract_prefix(self): + assert extract_prefix("OTHER_DB", prefixes) == "OTHER_" + assert extract_prefix("COLLECTOSS_DB", prefixes) == "COLLECTOSS_" + + def test_env_extract_prefix_default(self): + assert extract_prefix("SOME_DB", prefixes) is None + assert extract_prefix("THINGY_DB", prefixes) is None + + + def test_env_extract_prefix_unprefixed(self): + assert extract_prefix("DB", prefixes) is None + +class TestSystemEnv: + + def test_fetching_env(self): + # plain + os.environ["COLLECTOSS_NAME"] = "A" + assert SystemEnv.get("COLLECTOSS_NAME") == "A" + + # fallback handling + os.environ["OTHER_THING"] = "B" + assert SystemEnv.get("COLLECTOSS_THING", None, prefixes) == "B" + + # cleanup + del os.environ["COLLECTOSS_NAME"] + del os.environ["OTHER_THING"] + + def test_fetching_env_backwards(self): + os.environ["COLLECTOSS_NAME"] = "A" + assert SystemEnv.get("OTHER_NAME", None, prefixes) == "A" + + # cleanup + del os.environ["COLLECTOSS_NAME"] + + def test_fetching_env_no_value(self): + assert SystemEnv.get("COLLECTOSS_MISSING", None, prefixes) is None + + def test_fetching_env_default(self): + assert SystemEnv.get("COLLECTOSS_DEFAULT", "SOME", prefixes) == "SOME" + + def test_no_known_prefix(self): + # fallback handling + os.environ["THING"] = "C" + assert SystemEnv.get("THING", None, prefixes) == "C" + + + def test_get_bool_trues(self): + + cases = ["1", "true", "True", "TRUE", "y", "Y", "yes", "Yes"] + + for case in cases: + os.environ["OTHER_BOOL"] = case + assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == True, f"value '{case}' should resolve to True" + del os.environ["OTHER_BOOL"] + + def test_get_bool_falses(self): + + cases = ["0", "false", "False", "FALSE", "n", "N", "no", "No"] + + for case in cases: + os.environ["OTHER_BOOL"] = case + assert SystemEnv.get_bool("OTHER_BOOL", True, prefixes) == False, f"value '{case}' should resolve to False" + del os.environ["OTHER_BOOL"] + + def test_get_bool_default(self): + + cases = ["?", "maybe", "Stuff", "333"] + + for case in cases: + os.environ["OTHER_BOOL"] = case + assert SystemEnv.get_bool("OTHER_BOOL", False, prefixes) == False, f"value '{case}' should resolve to Default value" + del os.environ["OTHER_BOOL"] + +