From c12d6feb4db30ecf22078cadbb279e6856be8ce9 Mon Sep 17 00:00:00 2001
From: Joshua Garde <jgarde@jpl.nasa.gov>
Date: Thu, 1 May 2025 13:23:31 -0700
Subject: [PATCH 01/11] Initial terraform module refactor

---
 terraform/main.tf                             | 19 ++++-
 .../combine/confluence-combine_data.tf        | 71 +++++++++++--------
 terraform/modules/combine/main.tf             | 16 -----
 terraform/modules/combine/variables.tf        | 15 ++++
 4 files changed, 75 insertions(+), 46 deletions(-)

diff --git a/terraform/main.tf b/terraform/main.tf
index 6ea4533..f482ad5 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -19,6 +19,18 @@ provider "aws" {
 
 data "aws_caller_identity" "current" {}
 
+data "aws_efs_file_system" "input" {
+  creation_token = "${var.prefix}-input"
+}
+
+data "aws_iam_role" "job" {
+  name = "${var.prefix}-batch-job-role"
+}
+
+data "aws_iam_role" "exec" {
+  name = "${var.prefix}-ecs-exe-task-role"
+}
+
 locals {
   account_id = sensitive(data.aws_caller_identity.current.account_id)
   default_tags = length(var.default_tags) == 0 ? {
@@ -35,4 +47,9 @@ module "confluence-combine-data" {
   aws_region        = var.aws_region
   environment       = var.environment
   prefix            = var.prefix
-}
\ No newline at end of file
+  iam_execution_role_arn = data.aws_iam_role.exec.arn
+  iam_job_role_arn = data.aws_iam_role.job.arn
+  efs_file_system_ids = {
+    input = data.aws_efs_file_system.input.file_system_id
+  }
+}
diff --git a/terraform/modules/combine/confluence-combine_data.tf b/terraform/modules/combine/confluence-combine_data.tf
index fc5712b..e975446 100644
--- a/terraform/modules/combine/confluence-combine_data.tf
+++ b/terraform/modules/combine/confluence-combine_data.tf
@@ -2,40 +2,53 @@
 resource "aws_batch_job_definition" "generate_batch_jd_combine_data" {
   name                  = "${var.prefix}-combine-data"
   type                  = "container"
-  container_properties  = <<CONTAINER_PROPERTIES
-  {
-    "image": "${local.account_id}.dkr.ecr.us-west-2.amazonaws.com/${var.prefix}-combine-data",
-    "executionRoleArn": "${data.aws_iam_role.exe_role.arn}",
-    "jobRoleArn": "${data.aws_iam_role.job_role.arn}",
-    "fargatePlatformConfiguration": { "platformVersion": "LATEST" },
-    "logConfiguration": {
-      "logDriver" : "awslogs",
-      "options": {
-        "awslogs-group" : "${data.aws_cloudwatch_log_group.cw_log_group.name}"
+  platform_capabilities = ["FARGATE"]
+  propagate_tags        = true
+  tags                  = { "job_definition": "${var.prefix}-combine-data" }
+
+  container_properties  = jsonencode({
+    image = "${local.account_id}.dkr.ecr.us-west-2.amazonaws.com/${var.prefix}-combine-data"
+    executionRoleArn = var.iam_job_role_arn
+    jobRoleArn = var.iam_job_role_arn
+    fargatePlatformConfiguration = {
+      platformVersion = "LATEST"
+    }
+    logConfiguration = {
+      logDriver = "awslogs"
+      options = {
+        awslogs-group = aws_cloudwatch_log_group.cw_log_group.name
+      }
+    }
+    resourceRequirements = [
+      {
+        type = "MEMORY"
+        value = "2048"
+      },
+      {
+        type = "VCPU",
+        value = "1"
       }
-    },
-    "resourceRequirements": [
-      {"type": "MEMORY", "value": "2048"},
-      {"type": "VCPU", "value": "1"}
-    ],
-    "mountPoints": [
+    ]
+    mountPoints = [
       {
-        "sourceVolume": "input",
-        "containerPath": "/data"
+        sourceVolume = "input",
+        containerPath = "/data"
+        readOnly = false
       }
-    ],
-    "volumes": [
+    ]
+    volumes = [
       {
-        "name": "input",
-        "efsVolumeConfiguration": {
-          "fileSystemId": "${data.aws_efs_file_system.aws_efs_input.file_system_id}",
-          "rootDirectory": "/"
+        name = "input"
+        efsVolumeConfiguration = {
+          fileSystemId = var.efs_file_system_ids["input"]
+          rootDirectory = "/"
         }
       }
     ]
-  }
-  CONTAINER_PROPERTIES
-  platform_capabilities = ["FARGATE"]
-  propagate_tags        = true
-  tags = { "job_definition": "${var.prefix}-combine-data" }
+  })
+}
+
+# Log group
+resource "aws_cloudwatch_log_group" "cw_log_group" {
+  name = "/aws/batch/job/${var.prefix}-combine-data/"
 }
diff --git a/terraform/modules/combine/main.tf b/terraform/modules/combine/main.tf
index c090c15..08b9f14 100644
--- a/terraform/modules/combine/main.tf
+++ b/terraform/modules/combine/main.tf
@@ -1,22 +1,6 @@
 # Data sources
 data "aws_caller_identity" "current" {}
 
-data "aws_cloudwatch_log_group" "cw_log_group" {
-  name = "/aws/batch/job/${var.prefix}-combine-data/"
-}
-
-data "aws_efs_file_system" "aws_efs_input" {
-  creation_token = "${var.prefix}-input"
-}
-
-data "aws_iam_role" "job_role" {
-  name = "${var.prefix}-batch-job-role"
-}
-
-data "aws_iam_role" "exe_role" {
-  name = "${var.prefix}-ecs-exe-task-role"
-}
-
 # Local variables
 locals {
   account_id = data.aws_caller_identity.current.account_id
diff --git a/terraform/modules/combine/variables.tf b/terraform/modules/combine/variables.tf
index 30c4a5d..699b768 100644
--- a/terraform/modules/combine/variables.tf
+++ b/terraform/modules/combine/variables.tf
@@ -29,3 +29,18 @@ variable "prefix" {
   type        = string
   description = "Prefix to add to all AWS resources as a unique identifier"
 }
+
+variable "efs_file_system_ids" {
+  type        = map(string)
+  description = "Map of EFS file system ids to pass to the container definition"
+}
+
+variable "iam_job_role_arn" {
+  type        = string
+  description = "The IAM ARN of the job role"
+}
+
+variable "iam_execution_role_arn" {
+  type        = string
+  description = "The IAM ARN of the execution role"
+}

From 44d0ae8ffd119a097f8a7fd6706416320398d693 Mon Sep 17 00:00:00 2001
From: Joshua Garde <jgarde@jpl.nasa.gov>
Date: Thu, 1 May 2025 13:28:22 -0700
Subject: [PATCH 02/11] Update variables

---
 terraform/variables.tf | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/terraform/variables.tf b/terraform/variables.tf
index 30c4a5d..c77d847 100644
--- a/terraform/variables.tf
+++ b/terraform/variables.tf
@@ -29,3 +29,13 @@ variable "prefix" {
   type        = string
   description = "Prefix to add to all AWS resources as a unique identifier"
 }
+
+variable "iam_job_role_arn" {
+  type        = string
+  description = "The IAM ARN of the job role"
+}
+
+variable "iam_execution_role_arn" {
+  type        = string
+  description = "The IAM ARN of the execution role"
+}

From 6cc8fc946bbe3e3ad2a0d8d24504d8a4bf37890b Mon Sep 17 00:00:00 2001
From: Joshua Garde <jgarde@jpl.nasa.gov>
Date: Mon, 5 May 2025 11:24:53 -0700
Subject: [PATCH 03/11] Remove unused variables

---
 terraform/variables.tf | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/terraform/variables.tf b/terraform/variables.tf
index c77d847..30c4a5d 100644
--- a/terraform/variables.tf
+++ b/terraform/variables.tf
@@ -29,13 +29,3 @@ variable "prefix" {
   type        = string
   description = "Prefix to add to all AWS resources as a unique identifier"
 }
-
-variable "iam_job_role_arn" {
-  type        = string
-  description = "The IAM ARN of the job role"
-}
-
-variable "iam_execution_role_arn" {
-  type        = string
-  description = "The IAM ARN of the execution role"
-}

From 1ede630c001b74802f7082efe0f3049e537a0147 Mon Sep 17 00:00:00 2001
From: Joshua Garde <jgarde@jpl.nasa.gov>
Date: Mon, 19 May 2025 13:09:39 -0700
Subject: [PATCH 04/11] Cleanup

---
 terraform/main.tf                             | 18 +++----
 .../combine/confluence-combine_data.tf        | 47 ++++++++-----------
 terraform/modules/combine/main.tf             |  7 +--
 terraform/modules/combine/variables.tf        | 29 +++++++-----
 4 files changed, 47 insertions(+), 54 deletions(-)

diff --git a/terraform/main.tf b/terraform/main.tf
index f482ad5..6efd5f1 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -14,7 +14,7 @@ provider "aws" {
   default_tags {
     tags = local.default_tags
   }
-  region  = var.aws_region
+  region = var.aws_region
 }
 
 data "aws_caller_identity" "current" {}
@@ -41,15 +41,15 @@ locals {
 }
 
 module "confluence-combine-data" {
-  source            = "./modules/combine"
-  app_name          = var.app_name
-  app_version       = var.app_version
-  aws_region        = var.aws_region
-  environment       = var.environment
-  prefix            = var.prefix
-  iam_execution_role_arn = data.aws_iam_role.exec.arn
-  iam_job_role_arn = data.aws_iam_role.job.arn
+  source = "./modules/combine"
+  app_name = var.app_name
+  app_version = var.app_version
+  aws_region = var.aws_region
   efs_file_system_ids = {
     input = data.aws_efs_file_system.input.file_system_id
   }
+  environment = var.environment
+  iam_execution_role_arn = data.aws_iam_role.exec.arn
+  iam_job_role_arn = data.aws_iam_role.job.arn
+  prefix = var.prefix
 }
diff --git a/terraform/modules/combine/confluence-combine_data.tf b/terraform/modules/combine/confluence-combine_data.tf
index e975446..6c25ae7 100644
--- a/terraform/modules/combine/confluence-combine_data.tf
+++ b/terraform/modules/combine/confluence-combine_data.tf
@@ -7,8 +7,8 @@ resource "aws_batch_job_definition" "generate_batch_jd_combine_data" {
   tags                  = { "job_definition": "${var.prefix}-combine-data" }
 
   container_properties  = jsonencode({
-    image = "${local.account_id}.dkr.ecr.us-west-2.amazonaws.com/${var.prefix}-combine-data"
-    executionRoleArn = var.iam_job_role_arn
+    image = "${local.account_id}.dkr.ecr.us-west-2.amazonaws.com/${var.prefix}-combine-data:${var.image_tag}"
+    executionRoleArn = var.iam_execution_role_arn
     jobRoleArn = var.iam_job_role_arn
     fargatePlatformConfiguration = {
       platformVersion = "LATEST"
@@ -19,32 +19,25 @@ resource "aws_batch_job_definition" "generate_batch_jd_combine_data" {
         awslogs-group = aws_cloudwatch_log_group.cw_log_group.name
       }
     }
-    resourceRequirements = [
-      {
-        type = "MEMORY"
-        value = "2048"
-      },
-      {
-        type = "VCPU",
-        value = "1"
+    resourceRequirements = [{
+      type = "MEMORY"
+      value = "2048"
+    }, {
+      type = "VCPU",
+      value = "1"
+    }]
+    mountPoints = [{
+      sourceVolume = "input",
+      containerPath = "/data"
+      readOnly = false
+    }]
+    volumes = [{
+      name = "input"
+      efsVolumeConfiguration = {
+        fileSystemId = var.efs_file_system_ids["input"]
+        rootDirectory = "/"
       }
-    ]
-    mountPoints = [
-      {
-        sourceVolume = "input",
-        containerPath = "/data"
-        readOnly = false
-      }
-    ]
-    volumes = [
-      {
-        name = "input"
-        efsVolumeConfiguration = {
-          fileSystemId = var.efs_file_system_ids["input"]
-          rootDirectory = "/"
-        }
-      }
-    ]
+    }]
   })
 }
 
diff --git a/terraform/modules/combine/main.tf b/terraform/modules/combine/main.tf
index 08b9f14..b81ede2 100644
--- a/terraform/modules/combine/main.tf
+++ b/terraform/modules/combine/main.tf
@@ -4,9 +4,4 @@ data "aws_caller_identity" "current" {}
 # Local variables
 locals {
   account_id = data.aws_caller_identity.current.account_id
-  default_tags = length(var.default_tags) == 0 ? {
-    application : var.app_name,
-    environment : var.environment,
-    version : var.app_version
-  } : var.default_tags
-}
\ No newline at end of file
+}
diff --git a/terraform/modules/combine/variables.tf b/terraform/modules/combine/variables.tf
index 699b768..20ccceb 100644
--- a/terraform/modules/combine/variables.tf
+++ b/terraform/modules/combine/variables.tf
@@ -8,7 +8,6 @@ variable "app_version" {
   type        = string
   description = "The application version number"
 }
-
 variable "aws_region" {
   type        = string
   description = "AWS region to deploy to"
@@ -16,8 +15,13 @@ variable "aws_region" {
 }
 
 variable "default_tags" {
-  type    = map(string)
-  default = {}
+  type        = map(string)
+  default     = {}
+}
+
+variable "efs_file_system_ids" {
+  type        = map(string)
+  description = "Map of EFS file system ids to pass to the container definition"
 }
 
 variable "environment" {
@@ -25,14 +29,9 @@ variable "environment" {
   description = "The environment in which to deploy to"
 }
 
-variable "prefix" {
+variable "iam_execution_role_arn" {
   type        = string
-  description = "Prefix to add to all AWS resources as a unique identifier"
-}
-
-variable "efs_file_system_ids" {
-  type        = map(string)
-  description = "Map of EFS file system ids to pass to the container definition"
+  description = "The IAM ARN of the execution role"
 }
 
 variable "iam_job_role_arn" {
@@ -40,7 +39,13 @@ variable "iam_job_role_arn" {
   description = "The IAM ARN of the job role"
 }
 
-variable "iam_execution_role_arn" {
+variable "image_tag" {
   type        = string
-  description = "The IAM ARN of the execution role"
+  description = "The container image tag to utilize"
+  default     = "latest"
+}
+
+variable "prefix" {
+  type        = string
+  description = "Prefix to add to all AWS resources as a unique identifier"
 }

From 5ed6e87aa7c4ad72845b560e8319f80866eaf0c0 Mon Sep 17 00:00:00 2001
From: Joshua Garde <jgarde@jpl.nasa.gov>
Date: Mon, 19 May 2025 13:22:21 -0700
Subject: [PATCH 05/11] Lint

---
 terraform/modules/combine/confluence-combine_data.tf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/terraform/modules/combine/confluence-combine_data.tf b/terraform/modules/combine/confluence-combine_data.tf
index 6c25ae7..7bd5a7d 100644
--- a/terraform/modules/combine/confluence-combine_data.tf
+++ b/terraform/modules/combine/confluence-combine_data.tf
@@ -1,10 +1,10 @@
 # Job Definition
 resource "aws_batch_job_definition" "generate_batch_jd_combine_data" {
-  name                  = "${var.prefix}-combine-data"
-  type                  = "container"
+  name = "${var.prefix}-combine-data"
+  type = "container"
   platform_capabilities = ["FARGATE"]
-  propagate_tags        = true
-  tags                  = { "job_definition": "${var.prefix}-combine-data" }
+  propagate_tags = true
+  tags = { "job_definition": "${var.prefix}-combine-data" }
 
   container_properties  = jsonencode({
     image = "${local.account_id}.dkr.ecr.us-west-2.amazonaws.com/${var.prefix}-combine-data:${var.image_tag}"

From 93e69d7db224c6a3d5ec72f2f7c22bf42847e438 Mon Sep 17 00:00:00 2001
From: Joshua Garde <jgarde@jpl.nasa.gov>
Date: Mon, 19 May 2025 16:57:16 -0700
Subject: [PATCH 06/11] default_tags padding

---
 terraform/variables.tf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/terraform/variables.tf b/terraform/variables.tf
index 30c4a5d..c368ffb 100644
--- a/terraform/variables.tf
+++ b/terraform/variables.tf
@@ -16,8 +16,8 @@ variable "aws_region" {
 }
 
 variable "default_tags" {
-  type    = map(string)
-  default = {}
+  type        = map(string)
+  default     = {}
 }
 
 variable "environment" {

From a2e380ece652a537d3007dda14371874ff10aad5 Mon Sep 17 00:00:00 2001
From: Travis-Simmons <travisthomassimmons@gmail.com>
Date: Thu, 8 May 2025 19:16:08 +0000
Subject: [PATCH 07/11] ssc combine, use the --ssc flag

---
 combine_data.py | 43 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/combine_data.py b/combine_data.py
index e980ce8..495e1be 100644
--- a/combine_data.py
+++ b/combine_data.py
@@ -82,6 +82,9 @@ def create_args():
                             "--expanded",
                             help="Indicate we are looking for expanded set files.",
                             action="store_true")
+    arg_parser.add_argument("--ssc",
+                            help="Indicate we are looking for expanded set files.",
+                            action="store_true")
     return arg_parser
 
 def get_logger():
@@ -106,7 +109,7 @@ def get_logger():
     # Return logger
     return logger
 
-def combine_continents(continents, data_dir, sword_version,expanded, logger):
+def combine_continents(continents, data_dir, sword_version,expanded,ssc, logger):
     """Combine continent-level data in to global data.
 
     Parameters
@@ -176,8 +179,44 @@ def combine_continents(continents, data_dir, sword_version,expanded, logger):
             json.dump(continent_json, jf, indent=2)
             logger.info(f"Written: {c_file}")
 
+    if ssc:
+        ssc_json_data = combine_ssc(data_dir=data_dir, logger = logger)
+
+        with open(os.path.join(data_dir,"ssc_hls_list.json"), "w") as jf:
+            json.dump(ssc_json_data, jf, indent=2)
+
     return reaches_json_list
 
+def combine_ssc(data_dir:str, logger):
+        """Combine SSC input data into a single file."""
+        ssc_input_data = glob.glob(os.path.join(data_dir, "ssc", "*.json"))
+        logger.info('found', len(ssc_input_data), 'ssc files...')
+
+
+        ssc_json_data = {}
+        count = 0
+        for ssc_input in ssc_input_data:
+            logger.info('processing ssc')
+            with open(ssc_input) as jf:
+                data = json.load(jf)
+                logger.info(f'{ssc_input}')
+                for key in list(data.keys()):
+                    short_key = key[:-10]
+                    if short_key in list(ssc_json_data.keys()):
+                        prev_len = len(ssc_json_data[short_key])
+                        ssc_json_data[short_key].extend(data[key])
+                        ssc_json_data[short_key] = list(set(ssc_json_data[short_key]))
+                        after_len = len(ssc_json_data[short_key])
+                        if prev_len != after_len:
+                            logger.info(f'{ssc_input} difference {short_key}')
+
+                    else:
+                        ssc_json_data[short_key] = data[key]
+
+
+                # ssc_json_data.extend(data)
+        return ssc_json_data
+
 def create_basin_data(data_dir, basin_id, base_reaches, sword_version):
     continent_codes = { '1': "af", '2': "eu", '3': "as", '4': "as", '5': "oc", '6': "sa", '7': "na", '8': "na", '9':"na" }
 
@@ -260,7 +299,7 @@ def combine_data():
     ]
 
     # Combine continent-level data
-    json_file_list = combine_continents(continents, args.datadir, args.sword_version, args.expanded, logger)
+    json_file_list = combine_continents(continents, args.datadir, args.sword_version, args.expanded, args.ssc, logger)
 
     # Upload JSON files to S3
     if args.uploadbucket:

From eb232bcb05cd19d8531722266ffaa3e1e4386139 Mon Sep 17 00:00:00 2001
From: Travis-Simmons <travisthomassimmons@gmail.com>
Date: Thu, 8 May 2025 19:24:17 +0000
Subject: [PATCH 08/11] turning to list

---
 combine_data.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/combine_data.py b/combine_data.py
index 495e1be..3e26f6c 100644
--- a/combine_data.py
+++ b/combine_data.py
@@ -177,8 +177,7 @@ def combine_continents(continents, data_dir, sword_version,expanded,ssc, logger)
         reaches_json_list.append(c_file)
         with open(c_file, 'w') as jf:
             json.dump(continent_json, jf, indent=2)
-            logger.info(f"Written: {c_file}")
-
+            logger.info(f"Written: {c_file}")         
     if ssc:
         ssc_json_data = combine_ssc(data_dir=data_dir, logger = logger)
 
@@ -215,7 +214,9 @@ def combine_ssc(data_dir:str, logger):
 
 
                 # ssc_json_data.extend(data)
-        return ssc_json_data
+        single_entry_list = [{k: v} for k, v in ssc_json_data.items()]
+
+        return single_entry_list
 
 def create_basin_data(data_dir, basin_id, base_reaches, sword_version):
     continent_codes = { '1': "af", '2': "eu", '3': "as", '4': "as", '5': "oc", '6': "sa", '7': "na", '8': "na", '9':"na" }

From 107ace456a77ac015ba599bdebeb312400328944 Mon Sep 17 00:00:00 2001
From: Travis-Simmons <travisthomassimmons@gmail.com>
Date: Thu, 8 May 2025 19:34:06 +0000
Subject: [PATCH 09/11] removed logger

---
 combine_data.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/combine_data.py b/combine_data.py
index 3e26f6c..6504cbb 100644
--- a/combine_data.py
+++ b/combine_data.py
@@ -189,16 +189,13 @@ def combine_continents(continents, data_dir, sword_version,expanded,ssc, logger)
 def combine_ssc(data_dir:str, logger):
         """Combine SSC input data into a single file."""
         ssc_input_data = glob.glob(os.path.join(data_dir, "ssc", "*.json"))
-        logger.info('found', len(ssc_input_data), 'ssc files...')
 
 
         ssc_json_data = {}
         count = 0
         for ssc_input in ssc_input_data:
-            logger.info('processing ssc')
             with open(ssc_input) as jf:
                 data = json.load(jf)
-                logger.info(f'{ssc_input}')
                 for key in list(data.keys()):
                     short_key = key[:-10]
                     if short_key in list(ssc_json_data.keys()):
@@ -206,8 +203,6 @@ def combine_ssc(data_dir:str, logger):
                         ssc_json_data[short_key].extend(data[key])
                         ssc_json_data[short_key] = list(set(ssc_json_data[short_key]))
                         after_len = len(ssc_json_data[short_key])
-                        if prev_len != after_len:
-                            logger.info(f'{ssc_input} difference {short_key}')
 
                     else:
                         ssc_json_data[short_key] = data[key]

From ac8cb0ca0be3ad51722996c9d94e672631dda071 Mon Sep 17 00:00:00 2001
From: Nikki <17799906+nikki-t@users.noreply.github.com>
Date: Thu, 8 May 2025 16:11:27 -0400
Subject: [PATCH 10/11] Make sure SSC HLS JSON is uploaded to S3

---
 combine_data.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/combine_data.py b/combine_data.py
index 6504cbb..d4eb226 100644
--- a/combine_data.py
+++ b/combine_data.py
@@ -181,8 +181,11 @@ def combine_continents(continents, data_dir, sword_version,expanded,ssc, logger)
     if ssc:
         ssc_json_data = combine_ssc(data_dir=data_dir, logger = logger)
 
-        with open(os.path.join(data_dir,"ssc_hls_list.json"), "w") as jf:
+        ssc_json = os.path.join(data_dir,"ssc_hls_list.json")
+        with open(ssc_json, "w") as jf:
             json.dump(ssc_json_data, jf, indent=2)
+        reaches_json_list.append(ssc_json)
+        logger.info(f"Written: %s", ssc_json)
 
     return reaches_json_list
 

From fde75f305e500b375d218b84fc4d91f9ecff2c89 Mon Sep 17 00:00:00 2001
From: Nikki Tebaldi <17799906+nikki-t@users.noreply.github.com>
Date: Thu, 15 May 2025 16:30:56 -0400
Subject: [PATCH 11/11] Integrate SSC and lakeflow without any extra command
 line args (#11)

---
 combine_data.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/combine_data.py b/combine_data.py
index d4eb226..ce15e2f 100644
--- a/combine_data.py
+++ b/combine_data.py
@@ -82,9 +82,6 @@ def create_args():
                             "--expanded",
                             help="Indicate we are looking for expanded set files.",
                             action="store_true")
-    arg_parser.add_argument("--ssc",
-                            help="Indicate we are looking for expanded set files.",
-                            action="store_true")
     return arg_parser
 
 def get_logger():
@@ -109,7 +106,7 @@ def get_logger():
     # Return logger
     return logger
 
-def combine_continents(continents, data_dir, sword_version,expanded,ssc, logger):
+def combine_continents(continents, data_dir, sword_version, expanded, logger):
     """Combine continent-level data in to global data.
 
     Parameters
@@ -171,21 +168,23 @@ def combine_continents(continents, data_dir, sword_version,expanded,ssc, logger)
         with open(outpath, 'w') as jf:
             json.dump(out_dict[a_key], jf, indent=2)
             logger.info(f"Written: {outpath}.")
-    
+
     if not expanded:
         c_file = os.path.join(data_dir, 'continent.json')
         reaches_json_list.append(c_file)
         with open(c_file, 'w') as jf:
             json.dump(continent_json, jf, indent=2)
             logger.info(f"Written: {c_file}")         
-    if ssc:
-        ssc_json_data = combine_ssc(data_dir=data_dir, logger = logger)
 
+    ssc_json_data = combine_ssc(data_dir=data_dir, logger = logger)
+    if len(ssc_json_data) > 0:
         ssc_json = os.path.join(data_dir,"ssc_hls_list.json")
         with open(ssc_json, "w") as jf:
             json.dump(ssc_json_data, jf, indent=2)
         reaches_json_list.append(ssc_json)
         logger.info(f"Written: %s", ssc_json)
+    else:
+        logger.info("No SSC JSON written.")
 
     return reaches_json_list
 
@@ -193,7 +192,6 @@ def combine_ssc(data_dir:str, logger):
         """Combine SSC input data into a single file."""
         ssc_input_data = glob.glob(os.path.join(data_dir, "ssc", "*.json"))
 
-
         ssc_json_data = {}
         count = 0
         for ssc_input in ssc_input_data:
@@ -210,7 +208,6 @@ def combine_ssc(data_dir:str, logger):
                     else:
                         ssc_json_data[short_key] = data[key]
 
-
                 # ssc_json_data.extend(data)
         single_entry_list = [{k: v} for k, v in ssc_json_data.items()]
 
@@ -298,7 +295,11 @@ def combine_data():
     ]
 
     # Combine continent-level data
-    json_file_list = combine_continents(continents, args.datadir, args.sword_version, args.expanded, args.ssc, logger)
+    json_file_list = combine_continents(continents, args.datadir, args.sword_version, args.expanded, logger)
+
+    # Check for lakeflow data
+    viable_lakes = pathlib.Path(args.datadir).joinpath("lakeflow", "viable", "viable_locations.csv")
+    if viable_lakes.exists(): json_file_list.append(str(viable_lakes))
 
     # Upload JSON files to S3
     if args.uploadbucket: