Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 42 additions & 3 deletions combine_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def get_logger():
# Return logger
return logger

def combine_continents(continents, data_dir, sword_version,expanded, logger):
def combine_continents(continents, data_dir, sword_version, expanded, logger):
"""Combine continent-level data in to global data.

Parameters
Expand Down Expand Up @@ -168,16 +168,51 @@ def combine_continents(continents, data_dir, sword_version,expanded, logger):
with open(outpath, 'w') as jf:
json.dump(out_dict[a_key], jf, indent=2)
logger.info(f"Written: {outpath}.")

if not expanded:
c_file = os.path.join(data_dir, 'continent.json')
reaches_json_list.append(c_file)
with open(c_file, 'w') as jf:
json.dump(continent_json, jf, indent=2)
logger.info(f"Written: {c_file}")
logger.info(f"Written: {c_file}")

ssc_json_data = combine_ssc(data_dir=data_dir, logger = logger)
if len(ssc_json_data) > 0:
ssc_json = os.path.join(data_dir,"ssc_hls_list.json")
with open(ssc_json, "w") as jf:
json.dump(ssc_json_data, jf, indent=2)
reaches_json_list.append(ssc_json)
logger.info(f"Written: %s", ssc_json)
else:
logger.info("No SSC JSON written.")

return reaches_json_list

def combine_ssc(data_dir:str, logger):
"""Combine SSC input data into a single file."""
ssc_input_data = glob.glob(os.path.join(data_dir, "ssc", "*.json"))

ssc_json_data = {}
count = 0
for ssc_input in ssc_input_data:
with open(ssc_input) as jf:
data = json.load(jf)
for key in list(data.keys()):
short_key = key[:-10]
if short_key in list(ssc_json_data.keys()):
prev_len = len(ssc_json_data[short_key])
ssc_json_data[short_key].extend(data[key])
ssc_json_data[short_key] = list(set(ssc_json_data[short_key]))
after_len = len(ssc_json_data[short_key])

else:
ssc_json_data[short_key] = data[key]

# ssc_json_data.extend(data)
single_entry_list = [{k: v} for k, v in ssc_json_data.items()]

return single_entry_list

def create_basin_data(data_dir, basin_id, base_reaches, sword_version):
continent_codes = { '1': "af", '2': "eu", '3': "as", '4': "as", '5': "oc", '6': "sa", '7': "na", '8': "na", '9':"na" }

Expand Down Expand Up @@ -262,6 +297,10 @@ def combine_data():
# Combine continent-level data
json_file_list = combine_continents(continents, args.datadir, args.sword_version, args.expanded, logger)

# Check for lakeflow data
viable_lakes = pathlib.Path(args.datadir).joinpath("lakeflow", "viable", "viable_locations.csv")
if viable_lakes.exists(): json_file_list.append(str(viable_lakes))

# Upload JSON files to S3
if args.uploadbucket:
try:
Expand Down
33 changes: 25 additions & 8 deletions terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,23 @@ provider "aws" {
default_tags {
tags = local.default_tags
}
region = var.aws_region
region = var.aws_region
}

data "aws_caller_identity" "current" {}

data "aws_efs_file_system" "input" {
creation_token = "${var.prefix}-input"
}

data "aws_iam_role" "job" {
name = "${var.prefix}-batch-job-role"
}

data "aws_iam_role" "exec" {
name = "${var.prefix}-ecs-exe-task-role"
}

locals {
account_id = sensitive(data.aws_caller_identity.current.account_id)
default_tags = length(var.default_tags) == 0 ? {
Expand All @@ -29,10 +41,15 @@ locals {
}

module "confluence-combine-data" {
source = "./modules/combine"
app_name = var.app_name
app_version = var.app_version
aws_region = var.aws_region
environment = var.environment
prefix = var.prefix
}
source = "./modules/combine"
app_name = var.app_name
app_version = var.app_version
aws_region = var.aws_region
efs_file_system_ids = {
input = data.aws_efs_file_system.input.file_system_id
}
environment = var.environment
iam_execution_role_arn = data.aws_iam_role.exec.arn
iam_job_role_arn = data.aws_iam_role.job.arn
prefix = var.prefix
}
78 changes: 42 additions & 36 deletions terraform/modules/combine/confluence-combine_data.tf
Original file line number Diff line number Diff line change
@@ -1,41 +1,47 @@
# Job Definition
resource "aws_batch_job_definition" "generate_batch_jd_combine_data" {
name = "${var.prefix}-combine-data"
type = "container"
container_properties = <<CONTAINER_PROPERTIES
{
"image": "${local.account_id}.dkr.ecr.us-west-2.amazonaws.com/${var.prefix}-combine-data",
"executionRoleArn": "${data.aws_iam_role.exe_role.arn}",
"jobRoleArn": "${data.aws_iam_role.job_role.arn}",
"fargatePlatformConfiguration": { "platformVersion": "LATEST" },
"logConfiguration": {
"logDriver" : "awslogs",
"options": {
"awslogs-group" : "${data.aws_cloudwatch_log_group.cw_log_group.name}"
}
},
"resourceRequirements": [
{"type": "MEMORY", "value": "2048"},
{"type": "VCPU", "value": "1"}
],
"mountPoints": [
{
"sourceVolume": "input",
"containerPath": "/data"
}
],
"volumes": [
{
"name": "input",
"efsVolumeConfiguration": {
"fileSystemId": "${data.aws_efs_file_system.aws_efs_input.file_system_id}",
"rootDirectory": "/"
}
}
]
}
CONTAINER_PROPERTIES
name = "${var.prefix}-combine-data"
type = "container"
platform_capabilities = ["FARGATE"]
propagate_tags = true
propagate_tags = true
tags = { "job_definition": "${var.prefix}-combine-data" }

container_properties = jsonencode({
image = "${local.account_id}.dkr.ecr.us-west-2.amazonaws.com/${var.prefix}-combine-data:${var.image_tag}"
executionRoleArn = var.iam_execution_role_arn
jobRoleArn = var.iam_job_role_arn
fargatePlatformConfiguration = {
platformVersion = "LATEST"
}
logConfiguration = {
logDriver = "awslogs"
options = {
awslogs-group = aws_cloudwatch_log_group.cw_log_group.name
}
}
resourceRequirements = [{
type = "MEMORY"
value = "2048"
}, {
type = "VCPU",
value = "1"
}]
mountPoints = [{
sourceVolume = "input",
containerPath = "/data"
readOnly = false
}]
volumes = [{
name = "input"
efsVolumeConfiguration = {
fileSystemId = var.efs_file_system_ids["input"]
rootDirectory = "/"
}
}]
})
}

# Log group
resource "aws_cloudwatch_log_group" "cw_log_group" {
name = "/aws/batch/job/${var.prefix}-combine-data/"
}
23 changes: 1 addition & 22 deletions terraform/modules/combine/main.tf
Original file line number Diff line number Diff line change
@@ -1,28 +1,7 @@
# Data sources
data "aws_caller_identity" "current" {}

data "aws_cloudwatch_log_group" "cw_log_group" {
name = "/aws/batch/job/${var.prefix}-combine-data/"
}

data "aws_efs_file_system" "aws_efs_input" {
creation_token = "${var.prefix}-input"
}

data "aws_iam_role" "job_role" {
name = "${var.prefix}-batch-job-role"
}

data "aws_iam_role" "exe_role" {
name = "${var.prefix}-ecs-exe-task-role"
}

# Local variables
locals {
account_id = data.aws_caller_identity.current.account_id
default_tags = length(var.default_tags) == 0 ? {
application : var.app_name,
environment : var.environment,
version : var.app_version
} : var.default_tags
}
}
26 changes: 23 additions & 3 deletions terraform/modules/combine/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,43 @@ variable "app_version" {
type = string
description = "The application version number"
}

variable "aws_region" {
type = string
description = "AWS region to deploy to"
default = "us-west-2"
}

variable "default_tags" {
type = map(string)
default = {}
type = map(string)
default = {}
}

variable "efs_file_system_ids" {
type = map(string)
description = "Map of EFS file system ids to pass to the container definition"
}

variable "environment" {
type = string
description = "The environment in which to deploy to"
}

variable "iam_execution_role_arn" {
type = string
description = "The IAM ARN of the execution role"
}

variable "iam_job_role_arn" {
type = string
description = "The IAM ARN of the job role"
}

variable "image_tag" {
type = string
description = "The container image tag to utilize"
default = "latest"
}

variable "prefix" {
type = string
description = "Prefix to add to all AWS resources as a unique identifier"
Expand Down
4 changes: 2 additions & 2 deletions terraform/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ variable "aws_region" {
}

variable "default_tags" {
type = map(string)
default = {}
type = map(string)
default = {}
}

variable "environment" {
Expand Down