diff --git a/combine_data.py b/combine_data.py index e980ce8..ce15e2f 100644 --- a/combine_data.py +++ b/combine_data.py @@ -106,7 +106,7 @@ def get_logger(): # Return logger return logger -def combine_continents(continents, data_dir, sword_version,expanded, logger): +def combine_continents(continents, data_dir, sword_version, expanded, logger): """Combine continent-level data in to global data. Parameters @@ -168,16 +168,51 @@ def combine_continents(continents, data_dir, sword_version,expanded, logger): with open(outpath, 'w') as jf: json.dump(out_dict[a_key], jf, indent=2) logger.info(f"Written: {outpath}.") - + if not expanded: c_file = os.path.join(data_dir, 'continent.json') reaches_json_list.append(c_file) with open(c_file, 'w') as jf: json.dump(continent_json, jf, indent=2) - logger.info(f"Written: {c_file}") + logger.info(f"Written: {c_file}") + + ssc_json_data = combine_ssc(data_dir=data_dir, logger = logger) + if len(ssc_json_data) > 0: + ssc_json = os.path.join(data_dir,"ssc_hls_list.json") + with open(ssc_json, "w") as jf: + json.dump(ssc_json_data, jf, indent=2) + reaches_json_list.append(ssc_json) + logger.info(f"Written: %s", ssc_json) + else: + logger.info("No SSC JSON written.") return reaches_json_list +def combine_ssc(data_dir:str, logger): + """Combine SSC input data into a single file.""" + ssc_input_data = glob.glob(os.path.join(data_dir, "ssc", "*.json")) + + ssc_json_data = {} + count = 0 + for ssc_input in ssc_input_data: + with open(ssc_input) as jf: + data = json.load(jf) + for key in list(data.keys()): + short_key = key[:-10] + if short_key in list(ssc_json_data.keys()): + prev_len = len(ssc_json_data[short_key]) + ssc_json_data[short_key].extend(data[key]) + ssc_json_data[short_key] = list(set(ssc_json_data[short_key])) + after_len = len(ssc_json_data[short_key]) + + else: + ssc_json_data[short_key] = data[key] + + # ssc_json_data.extend(data) + single_entry_list = [{k: v} for k, v in ssc_json_data.items()] + + return single_entry_list + def create_basin_data(data_dir, basin_id, base_reaches, sword_version): continent_codes = { '1': "af", '2': "eu", '3': "as", '4': "as", '5': "oc", '6': "sa", '7': "na", '8': "na", '9':"na" } @@ -262,6 +297,10 @@ def combine_data(): # Combine continent-level data json_file_list = combine_continents(continents, args.datadir, args.sword_version, args.expanded, logger) + # Check for lakeflow data + viable_lakes = pathlib.Path(args.datadir).joinpath("lakeflow", "viable", "viable_locations.csv") + if viable_lakes.exists(): json_file_list.append(str(viable_lakes)) + # Upload JSON files to S3 if args.uploadbucket: try: diff --git a/terraform/main.tf b/terraform/main.tf index 6ea4533..6efd5f1 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -14,11 +14,23 @@ provider "aws" { default_tags { tags = local.default_tags } - region = var.aws_region + region = var.aws_region } data "aws_caller_identity" "current" {} +data "aws_efs_file_system" "input" { + creation_token = "${var.prefix}-input" +} + +data "aws_iam_role" "job" { + name = "${var.prefix}-batch-job-role" +} + +data "aws_iam_role" "exec" { + name = "${var.prefix}-ecs-exe-task-role" +} + locals { account_id = sensitive(data.aws_caller_identity.current.account_id) default_tags = length(var.default_tags) == 0 ? { @@ -29,10 +41,15 @@ locals { } module "confluence-combine-data" { - source = "./modules/combine" - app_name = var.app_name - app_version = var.app_version - aws_region = var.aws_region - environment = var.environment - prefix = var.prefix -} \ No newline at end of file + source = "./modules/combine" + app_name = var.app_name + app_version = var.app_version + aws_region = var.aws_region + efs_file_system_ids = { + input = data.aws_efs_file_system.input.file_system_id + } + environment = var.environment + iam_execution_role_arn = data.aws_iam_role.exec.arn + iam_job_role_arn = data.aws_iam_role.job.arn + prefix = var.prefix +} diff --git a/terraform/modules/combine/confluence-combine_data.tf b/terraform/modules/combine/confluence-combine_data.tf index fc5712b..7bd5a7d 100644 --- a/terraform/modules/combine/confluence-combine_data.tf +++ b/terraform/modules/combine/confluence-combine_data.tf @@ -1,41 +1,47 @@ # Job Definition resource "aws_batch_job_definition" "generate_batch_jd_combine_data" { - name = "${var.prefix}-combine-data" - type = "container" - container_properties = <