From 16574e7a46ff47203fb3d88d379d5564c56534c7 Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Sun, 5 Apr 2026 07:07:54 -0400 Subject: [PATCH 01/12] Updated Julia version --- Dockerfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9cb4d02..1bd7e40 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Stage 0 - Create from julia image and install OS packages -FROM julia:1.11.6 as stage0 +FROM julia:1.12.5 as stage0 RUN apt update && apt -y install bzip2 build-essential libxml2 # STAGE 1 - Python and python packages for S3 functionality @@ -15,8 +15,8 @@ ENV PYTHON="/usr/bin/python3" COPY deps.jl /app/deps.jl ENV JULIA_CPU_TARGET="generic;sandybridge,-xsaveopt,clone_all;haswell,-rdrnd,base(1)" RUN julia /app/deps.jl \ - && find /usr/local/bin/julia_pkgs -type d -exec chmod 755 {} \; \ - && find /usr/local/bin/julia_pkgs -type f -exec chmod 644 {} \; + && find /usr/local/bin/julia_pkgs -type d -exec chmod 755 {} \; \ + && find /usr/local/bin/julia_pkgs -type f -exec chmod 644 {} \; # Stage 3 - Copy SWOT script FROM stage2 as stage3 @@ -26,7 +26,7 @@ COPY ./sos_read /app/sos_read/ # Stage 4 - Execute algorithm FROM stage3 as stage4 LABEL version="1.0" \ - description="Containerized SAD algorithm." \ - "confluence.contact"="ntebaldi@umass.edu" \ - "algorithm.contact"="kandread@umass.edu" + description="Containerized SAD algorithm." \ + "confluence.contact"="ntebaldi@umass.edu" \ + "algorithm.contact"="kandread@umass.edu" ENTRYPOINT ["/usr/local/julia/bin/julia", "/app/swot.jl"] From 43570f476bb94bd3d2de77cb06ba27fd8fa1f383 Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Sun, 5 Apr 2026 07:08:15 -0400 Subject: [PATCH 02/12] Make script compatible with current Sad.jl version --- Sad.jl | 2 +- swot.jl | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Sad.jl b/Sad.jl index a1f580e..b47aa19 160000 --- a/Sad.jl +++ b/Sad.jl @@ -1 +1 @@ -Subproject commit a1f580e9e79054b06a9c5e564a7ef9c147405cf9 +Subproject commit b47aa195af327bd0a3fd555a9ab42790a3769efd diff --git a/swot.jl b/swot.jl index b544bbb..885845c 100644 --- a/swot.jl +++ b/swot.jl @@ -171,7 +171,7 @@ function main() else index = parsed_args["index"] + 1 end - + reachfile = parsed_args["reachfile"] bucketkey = parsed_args["bucketkey"] println("Index: $(index)") @@ -188,7 +188,7 @@ function main() H, W, S, dA, Hr, Wr, Sr, time_str = read_swot_obs(swotfile, nids) try - x, H, W, S = Sad.drop_unobserved(x, H, W, S) + reach = Sad.preprocess(x, H, W, S) catch e if e isa MethodError println("Error loading swot observation") @@ -202,16 +202,17 @@ function main() println("$(reachid): INVALID") write_output(reachid, 0, outdir, A0, n, Qa, Qu, W, time_str) else - Hmin = minimum(skipmissing(H[1, :])) - Qp, np, rp, zp = Sad.priors(sosfile, Hmin, reachid) - if ismissing(Qp) + p = Sad.priors(sosfile, reach.hmin, reachid) + if ismissing(p.Qp) println("$(reachid): INVALID, missing mean discharge") write_output(reachid, 0, outdir, A0, n, Qa, Qu, W, time_str) else try - nens = 100 # default ensemble size - nsamples = 1000 # default sampling size - Qa, Qu, A0, n = Sad.estimate(x, H, W, S, dA, Qp, np, rp, zp, nens, nsamples, Hr, Wr, Sr) + res = Sad.infer(p, reach) + A0 = Sad.compute_A0(reach, res.reach_ensemble) + n = mean(res.reach_ensemble[1, :]) + Qa[1, :] = res.Q_post + Qu[1, :] = [isnothing(res.A_post[t]) ? NaN : std(exp.(res.A_post[t][1,:])) for t in 1:reach.nt] println("$(reachid): VALID") write_output(reachid, 1, outdir, A0, n, Qa, Qu, W, time_str) catch From 32715915d0e7bc05feef7bd3a4b24dae7b2226c6 Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Mon, 6 Apr 2026 06:17:21 -0400 Subject: [PATCH 03/12] Use correct NCDatasets API --- swot.jl | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/swot.jl b/swot.jl index 885845c..599add4 100644 --- a/swot.jl +++ b/swot.jl @@ -48,11 +48,11 @@ Load SWOT observations. """ function read_swot_obs(ncfile::String, nids::Vector{Int}) Dataset(ncfile) do ds - nodes = NCDatasets.group(ds, "node") - reaches = NCDatasets.group(ds, "reach") - S = permutedims(nodes["slope2"][:]) - H = permutedims(nodes["wse"][:]) - W = permutedims(nodes["width"][:]) + nodes = ds.group["node"] + reaches = ds.group["reach"] + S = permutedims(nodes["slope2"][:, :]) + H = permutedims(nodes["wse"][:, :]) + W = permutedims(nodes["width"][:, :]) dA = reaches["d_x_area"][:] dA = convert(Vector{Sad.FloatM}, dA) Hr = convert(Vector{Sad.FloatM}, reaches["wse"][:]) @@ -64,10 +64,8 @@ function read_swot_obs(ncfile::String, nids::Vector{Int}) nid = nodes["node_id"][:] dmap = Dict(nid[k] => k for k=1:length(nid)) i = [dmap[k] for k in nids] - time_str_var = reaches["time_str"].var - time_str_raw = permutedims(time_str_var[:]) - time_str = [join(time_str_raw[i, :]) for i in 1:size(time_str_raw, 1)] - + time = reaches["time"][:] + time_str = [string(t) for t in time] H[i, :], W[i, :], S[i, :], dA, Hr, Wr, Sr, time_str end @@ -86,7 +84,7 @@ Retrieve information about river reach cross sections. """ function river_info(id::Int, swordfile::String) Dataset(swordfile) do fd - g = NCDatasets.group(fd, "nodes") + g = fd.group["nodes"] i = findall(g["reach_id"][:] .== id) nid = g["node_id"][i] x = g["dist_out"][i] From 9a0e8ecf2ceeaf22bbaa3444443fdef2501177cc Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Mon, 6 Apr 2026 06:17:34 -0400 Subject: [PATCH 04/12] Fix posterior discharge array assignment --- swot.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/swot.jl b/swot.jl index 599add4..4493a75 100644 --- a/swot.jl +++ b/swot.jl @@ -194,8 +194,8 @@ function main() end A0 = missing n = missing - Qa = Array{Missing}(missing, 1, size(W, 2)) - Qu = Array{Missing}(missing, 1, size(W, 2)) + Qa = Matrix{Sad.FloatM}(missing, 1, size(W, 2)) + Qu = Matrix{Sad.FloatM}(missing, 1, size(W, 2)) if all(ismissing, H) || all(ismissing, W) || all(ismissing, S) println("$(reachid): INVALID") write_output(reachid, 0, outdir, A0, n, Qa, Qu, W, time_str) @@ -209,8 +209,8 @@ function main() res = Sad.infer(p, reach) A0 = Sad.compute_A0(reach, res.reach_ensemble) n = mean(res.reach_ensemble[1, :]) - Qa[1, :] = res.Q_post - Qu[1, :] = [isnothing(res.A_post[t]) ? NaN : std(exp.(res.A_post[t][1,:])) for t in 1:reach.nt] + Qa[1, :] = [isnan(q) ? missing : q for q in res.Q_post] + Qu[1, :] = [isnothing(res.A_post[t]) ? missing : std(res.A_post[t][1, :]) for t in 1:reach.nt] println("$(reachid): VALID") write_output(reachid, 1, outdir, A0, n, Qa, Qu, W, time_str) catch From 84b391a3ddd302d2f196dd2bd80545400c65712c Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Mon, 6 Apr 2026 06:18:20 -0400 Subject: [PATCH 05/12] Updated Sad.jl submodule --- Sad.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sad.jl b/Sad.jl index b47aa19..5728688 160000 --- a/Sad.jl +++ b/Sad.jl @@ -1 +1 @@ -Subproject commit b47aa195af327bd0a3fd555a9ab42790a3769efd +Subproject commit 57286885b93f8a510135e004756caaa71ddba0a7 From f09b915a54a8d036e2d8de71c3f5308aaab413ad Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Mon, 6 Apr 2026 15:34:41 -0400 Subject: [PATCH 06/12] Removed exception --- swot.jl | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/swot.jl b/swot.jl index 4493a75..9aff7d3 100644 --- a/swot.jl +++ b/swot.jl @@ -185,13 +185,8 @@ function main() nids, x = river_info(reachid, swordfile) H, W, S, dA, Hr, Wr, Sr, time_str = read_swot_obs(swotfile, nids) - try - reach = Sad.preprocess(x, H, W, S) - catch e - if e isa MethodError - println("Error loading swot observation") - end - end + reach = Sad.preprocess(x, H, W, S) + A0 = missing n = missing Qa = Matrix{Sad.FloatM}(missing, 1, size(W, 2)) From 4f00af69cf725e5494d8ad8fdc9da37240297643 Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Tue, 7 Apr 2026 02:14:35 -0400 Subject: [PATCH 07/12] Coalesce scalar values to handle missing data --- swot.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swot.jl b/swot.jl index 9aff7d3..eed7adf 100644 --- a/swot.jl +++ b/swot.jl @@ -114,9 +114,9 @@ function write_output(reachid, valid, outdir, A0, n, Qa, Qu, W, time_str) ridv = defVar(out, "reach_id", Int64, (), fillvalue = FILL) ridv[:] = reachid A0v = defVar(out, "A0", Float64, (), fillvalue = FILL) - A0v[:] = A0 + A0v[:] = coalesce(A0, FILL) nv = defVar(out, "n", Float64, (), fillvalue = FILL) - nv[:] = n + n_v[:] = coalesce(n, FILL) Qav = defVar(out, "Qa", Float64, ("nt",), fillvalue = FILL) Qav[:] = replace!(Qa, NaN=>FILL) Quv = defVar(out, "Q_u", Float64, ("nt",), fillvalue = FILL) From 9ed4c944dbc46d8ddcb47e52b89c9388994c1dd5 Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Tue, 7 Apr 2026 02:17:04 -0400 Subject: [PATCH 08/12] Expand dependencies to make image building faster --- deps.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/deps.jl b/deps.jl index b726882..a5f3879 100644 --- a/deps.jl +++ b/deps.jl @@ -7,4 +7,10 @@ Pkg.add("Distributions") Pkg.add("NCDatasets") Pkg.add("JSON") Pkg.add("PyCall") +Pkg.add("DataInterpolations") +Pkg.add("DelimitedFiles") +Pkg.add("DifferentialEquations") +Pkg.add("LinearAlgebra") +Pkg.add("Statistics") +Pkg.add("ProgressMeter") Pkg.precompile() From e21139ced261fd6297057e1256b93ec448e9c3c5 Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Tue, 7 Apr 2026 03:08:50 -0400 Subject: [PATCH 09/12] Fixed typo in variable --- swot.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swot.jl b/swot.jl index eed7adf..44d2480 100644 --- a/swot.jl +++ b/swot.jl @@ -116,7 +116,7 @@ function write_output(reachid, valid, outdir, A0, n, Qa, Qu, W, time_str) A0v = defVar(out, "A0", Float64, (), fillvalue = FILL) A0v[:] = coalesce(A0, FILL) nv = defVar(out, "n", Float64, (), fillvalue = FILL) - n_v[:] = coalesce(n, FILL) + nv[:] = coalesce(n, FILL) Qav = defVar(out, "Qa", Float64, ("nt",), fillvalue = FILL) Qav[:] = replace!(Qa, NaN=>FILL) Quv = defVar(out, "Q_u", Float64, ("nt",), fillvalue = FILL) From 5c69f07ee1d4d350f66074b04c052a8f8f6c22e3 Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Tue, 7 Apr 2026 04:03:54 -0400 Subject: [PATCH 10/12] Reverted dependencies --- deps.jl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/deps.jl b/deps.jl index a5f3879..b726882 100644 --- a/deps.jl +++ b/deps.jl @@ -7,10 +7,4 @@ Pkg.add("Distributions") Pkg.add("NCDatasets") Pkg.add("JSON") Pkg.add("PyCall") -Pkg.add("DataInterpolations") -Pkg.add("DelimitedFiles") -Pkg.add("DifferentialEquations") -Pkg.add("LinearAlgebra") -Pkg.add("Statistics") -Pkg.add("ProgressMeter") Pkg.precompile() From c734a0627ae58b51902c67f88cca6f2ca1a96f6f Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Tue, 7 Apr 2026 04:53:29 -0400 Subject: [PATCH 11/12] Fixed missing prior and added time to inference --- Sad.jl | 2 +- swot.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Sad.jl b/Sad.jl index 5728688..277412f 160000 --- a/Sad.jl +++ b/Sad.jl @@ -1 +1 @@ -Subproject commit 57286885b93f8a510135e004756caaa71ddba0a7 +Subproject commit 277412f75a4b24aa303ddf2509772d6ce5258189 diff --git a/swot.jl b/swot.jl index 44d2480..995a750 100644 --- a/swot.jl +++ b/swot.jl @@ -196,12 +196,12 @@ function main() write_output(reachid, 0, outdir, A0, n, Qa, Qu, W, time_str) else p = Sad.priors(sosfile, reach.hmin, reachid) - if ismissing(p.Qp) + if ismissing(p) println("$(reachid): INVALID, missing mean discharge") write_output(reachid, 0, outdir, A0, n, Qa, Qu, W, time_str) else try - res = Sad.infer(p, reach) + res = Sad.infer(p, reach, time_str=time_str) A0 = Sad.compute_A0(reach, res.reach_ensemble) n = mean(res.reach_ensemble[1, :]) Qa[1, :] = [isnan(q) ? missing : q for q in res.Q_post] From 6a9bc5c9a8186578f2e45d3f3d1ae2d7def49b34 Mon Sep 17 00:00:00 2001 From: Kostas Andreadis Date: Tue, 7 Apr 2026 06:11:13 -0400 Subject: [PATCH 12/12] Updated Sad.jl submodule --- Sad.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sad.jl b/Sad.jl index 277412f..dfd8bba 160000 --- a/Sad.jl +++ b/Sad.jl @@ -1 +1 @@ -Subproject commit 277412f75a4b24aa303ddf2509772d6ce5258189 +Subproject commit dfd8bbaf5daa342d45e6c322139cac5bebf5b731