diff --git a/jenner-check/README.md b/jenner-check/README.md new file mode 100644 index 0000000..e0d8eb2 --- /dev/null +++ b/jenner-check/README.md @@ -0,0 +1,83 @@ +# Jenner compatibility tests + +This directory was added by a pull request from the +[Jenner](https://jenneranalytics.com) project. Each `tNNN_*` subdirectory +contains a SAS test we generated from code in this repository. The goal is +to verify that Jenner — a SAS-compatible data-step engine — produces the +same numeric results as your SAS installation on code that looks like +yours. + +## What's in here + +``` +jenner-check/ +├── README.md # this file +├── run_jenner_check.sas # master runner +├── jenner_check_report.csv # written by the runner +├── t001_…/ +│ ├── script.sas # the SAS script under test +│ ├── validate.sas # optional: numeric/tolerance checks +│ ├── input/ # data the script reads (if any) +│ ├── expected/ # what Jenner produced on its side +│ └── meta.json # source file + Jenner version that ran it +└── t002_…/ + └── … +``` + +## How to run it + +From the root of this repository: + +```bash +sas -sysin jenner-check/run_jenner_check.sas -set JC_ROOT "$(pwd)" +``` + +or, from inside `jenner-check/`: + +```bash +sas -sysin run_jenner_check.sas +``` + +The runner will: + +1. Find every `tNNN_*` bundle in this directory. +2. Run its `script.sas` with the log and listing captured to + `/actual.log` and `/actual.lst`. +3. If the bundle has a `validate.sas`, run that too. A validator produces + `work.jc_validation` with `status` and `message` columns. +4. Aggregate every test's outcome into `jenner_check_report.csv`. + +## How to report results + +Please attach `jenner-check/jenner_check_report.csv` as a comment on +the pull request that introduced this directory. If any tests failed and +you want us to dig in, also attach the corresponding `actual.log` and +`actual.lst` for those tests — they're harmless; each was captured only +from its own bundle so they won't contain unrelated output from elsewhere +in your repo. + +That's the whole ask. You don't need to merge anything else. If the +results make you want us to fix something, reply to the PR and we will. + +## Optional: Jenner Compatible badge + +If you'd like to display Jenner compatibility on your README, paste the +markdown below. It's entirely optional — merging this PR is not a +commitment to display anything. + +```markdown +[![Jenner Compatible](https://jenneranalytics.com/badges/jenner-compatible.svg)](https://jenneranalytics.com) +``` + +## Don't want future PRs from us? + +Reply to this PR with `no-more-prs` (case-insensitive) anywhere in a +comment, or open an issue titled `jenner-check: opt out`. We'll record +your repo as "do-not-contact" and stop automated PRs. + +## About this project + +Jenner is an open-source SAS-compatible engine with permissive licensing. +Full context is at [jenneranalytics.com](https://jenneranalytics.com). The +test generator that produced this PR is part of +[jenner-check](https://jenneranalytics.com/jenner-check). diff --git a/jenner-check/run_jenner.bat b/jenner-check/run_jenner.bat new file mode 100644 index 0000000..1039fdf --- /dev/null +++ b/jenner-check/run_jenner.bat @@ -0,0 +1,43 @@ +@echo off +rem run_jenner.bat - Windows runner for Jenner compatibility checks. +rem +rem Usage: run_jenner.bat [response.json] +rem +rem Submits a single .sas file to api.jenneranalytics.com. For +rem bundle-aware mode (autoexec.sas + script.sas concatenation) on +rem Windows, use WSL and invoke run_jenner.sh instead, or wait for the +rem Windows CI runner that will validate a bundle-aware .bat. +rem +rem Output: response.json contains the API response. Read it back in SAS: +rem filename resp 'response.json'; +rem libname resp JSON fileref=resp; +rem proc print data=resp.root; run; +rem +rem Requires: curl.exe (ships with Windows 10+ at C:\Windows\System32). + +setlocal + +if "%~1"=="" ( + echo Usage: %~nx0 ^ [response.json] + exit /b 2 +) + +set SCRIPT=%~1 +set OUT=%~2 +if "%OUT%"=="" set OUT=response.json + +set HOST=api.jenneranalytics.com + +curl.exe -sS -X POST "https://%HOST%/v1/run" ^ + -F "script=@%SCRIPT%;type=application/x-sas" ^ + -F "deterministic=1" ^ + -F "timeout=60" ^ + -o "%OUT%" + +if errorlevel 1 ( + echo curl failed with errorlevel %errorlevel% + exit /b 1 +) + +echo Response written to %OUT% +exit /b 0 diff --git a/jenner-check/run_jenner.sas b/jenner-check/run_jenner.sas new file mode 100644 index 0000000..550e8f8 --- /dev/null +++ b/jenner-check/run_jenner.sas @@ -0,0 +1,526 @@ +/* run_jenner.sas — invoke api.jenneranalytics.com from base SAS. + * + * Requires SAS 9.4 M5 or later (PROC HTTP + libname JSON engine). + * + * --------------------------------------------------------------------------- + * TL;DR for SAS users: + * + * %include 'run_jenner.sas'; + * %jenner_run(script=my_program.sas); / * one script * / + * %jenner_check_all(); / * whole bundle dir * / + * + * --------------------------------------------------------------------------- + * What this file gives you: + * + * %jenner_run — POST one .sas file to the Jenner API, display the + * log + listing + any generated files. + * %jenner_check_all — walk every jenner-check/tNNN_* bundle, + * invoke the API for each, compare the response to + * the bundle's expected.json, produce a summary + * CSV + SAS dataset the repo owner can attach to the + * jenner-check PR. + * + * --------------------------------------------------------------------------- + * How the API call is built: + * + * POST https://api.jenneranalytics.com/v1/run + * Content-Type: multipart/form-data; boundary=... + * + * fields: + * script the .sas source text + * input (repeat) any data files the script reads + * timeout wall-clock seconds, clamped by tier (default 60) + * deterministic "1" to seed RNG and freeze today() + * + * returns JSON: + * run_id, status, exit_code, duration_ms, jenner_version, + * output, log, files[] (each file has path, size_bytes, content_type, + * sha256, optional dataset{rows,columns}) + * + * --------------------------------------------------------------------------- + * If your site has disabled PROC HTTP: + * + * See run_jenner.bat (Windows) or run_jenner.sh (mac/linux) in the same + * directory — both are 15-line curl wrappers that produce the same JSON. + * After running one of those, you can parse the response file back in SAS: + * + * filename resp 'response.json'; + * libname resp JSON fileref=resp; + * proc print data=resp.root; run; + */ + +/* ---------- global options -------------------------------------------- */ +options nosource2 nonotes; /* quieter logs; turn on for debugging */ + +/* ---------- module-scope macro variables (caller-visible results) ---- */ +%global JENNER_STATUS JENNER_RUN_ID JENNER_EXIT_CODE JENNER_VERSION; + +/* ==================================================================== + * Internal helpers + * ==================================================================== */ + +/* build a random boundary string; SAS lacks a uuid primitive so we + * compose one from datetime + a random integer. */ +%macro _jc_boundary; + jc_%sysfunc(compress(%sysfunc(datetime(), b8601dt.), -:.))_%sysfunc(ranuni(0),hex6.) +%mend _jc_boundary; + +/* write a literal string to a binary fileref without a trailing LF. */ +%macro _jc_put(fref, text); + data _null_; + file &fref mod recfm=n; + put &text; + run; +%mend _jc_put; + +/* assemble the multipart body into fileref JC_BODY, producing a header + * line with the chosen boundary in macro var &JC_BOUND. Inputs is a + * space-separated list of file paths. + * + * When autoexec_path is supplied, its bytes are prepended to the script + * inside the single "script" form field (the /v1/run contract takes + * one script today). A newline separates the two so statements don't + * run together. */ +%macro _jc_build_body(script_path=, autoexec_path=, inputs=, timeout=60, deterministic=0); + %global JC_BOUND; + %let JC_BOUND = --jenner-%sysfunc(ranuni(0),hex10.)--; + + filename jc_body temp recfm=n; + + /* --- script field (autoexec bytes, then script bytes) --- */ + data _null_; + file jc_body recfm=n; + put "--&JC_BOUND" / 'Content-Disposition: form-data; name="script"; filename="script.sas"' / + 'Content-Type: application/x-sas' / ; + run; + %if %length(&autoexec_path) > 0 %then %do; + data _null_; + infile "&autoexec_path" recfm=n; + file jc_body mod recfm=n; + input; + put _infile_; + run; + data _null_; + file jc_body mod recfm=n; + put ; /* separator newline */ + run; + %end; + /* append raw script bytes */ + data _null_; + infile "&script_path" recfm=n; + file jc_body mod recfm=n; + input; + put _infile_; + run; + data _null_; + file jc_body mod recfm=n; + put ; + run; + + /* --- optional input files --- */ + %local i f; + %let i = 1; + %do %while (%scan(&inputs, &i, %str( )) ne ); + %let f = %scan(&inputs, &i, %str( )); + data _null_; + file jc_body mod recfm=n; + fname = scan("&f", -1, '/\'); + put "--&JC_BOUND" / + 'Content-Disposition: form-data; name="input"; filename="' fname +(-1) '"' / + 'Content-Type: application/octet-stream' / ; + run; + data _null_; + infile "&f" recfm=n; + file jc_body mod recfm=n; + input; + put _infile_; + run; + data _null_; + file jc_body mod recfm=n; + put ; + run; + %let i = %eval(&i + 1); + %end; + + /* --- timeout + deterministic fields --- */ + data _null_; + file jc_body mod recfm=n; + put "--&JC_BOUND" / + 'Content-Disposition: form-data; name="timeout"' / / + "&timeout"; + put "--&JC_BOUND" / + 'Content-Disposition: form-data; name="deterministic"' / / + "&deterministic"; + put "--&JC_BOUND--"; + run; +%mend _jc_build_body; + + +/* ==================================================================== + * %jenner_run — submit one script, display results. + * ==================================================================== */ +%macro jenner_run( + script=, + autoexec=, + inputs=, + host=api.jenneranalytics.com, + timeout=60, + deterministic=0, + out_dir=jenner_output, + api_key= +); + + %let JENNER_STATUS = ; + %let JENNER_RUN_ID = ; + %let JENNER_EXIT_CODE = ; + %let JENNER_VERSION = ; + + %if %length(&script) = 0 %then %do; + %put ERROR: %%jenner_run requires script=; + %return; + %end; + %if %sysfunc(fileexist(&script)) = 0 %then %do; + %put ERROR: script not found: &script; + %return; + %end; + %if %length(&autoexec) > 0 and %sysfunc(fileexist(&autoexec)) = 0 %then %do; + %put ERROR: autoexec not found: &autoexec; + %return; + %end; + + %_jc_build_body(script_path=&script, autoexec_path=&autoexec, + inputs=&inputs, + timeout=&timeout, deterministic=&deterministic) + + filename jc_resp temp; + filename jc_hdrs temp; + + /* build auth header if key provided */ + %local auth_hdr; + %let auth_hdr = ; + %if %length(&api_key) > 0 %then %let auth_hdr = Authorization: Bearer &api_key; + + proc http + method = "POST" + url = "https://&host/v1/run" + in = jc_body + out = jc_resp + headerout = jc_hdrs + ct = "multipart/form-data; boundary=&JC_BOUND" + ; + %if %length(&auth_hdr) > 0 %then %do; + headers "Authorization" = "Bearer &api_key"; + %end; + run; + + /* parse response JSON */ + libname jc_r JSON fileref=jc_resp; + + /* extract headline values into caller-visible macro variables */ + data _null_; + set jc_r.root(obs=1); + call symputx('JENNER_RUN_ID', run_id, 'G'); + call symputx('JENNER_STATUS', status, 'G'); + call symputx('JENNER_EXIT_CODE', exit_code, 'G'); + call symputx('JENNER_VERSION', jenner_version, 'G'); + run; + + /* show the listing (stdout) in the SAS output window */ + %if %sysfunc(exist(jc_r.root)) %then %do; + data _null_; + set jc_r.root(obs=1); + length line $32767; + put '==== Jenner output ====================================='; + do i = 1 to countc(output, '0A'x) + 1; + line = scan(output, i, '0A'x); + put line; + end; + put '==== Jenner log ========================================'; + do i = 1 to countc(log, '0A'x) + 1; + line = scan(log, i, '0A'x); + put line; + end; + put "==== run_id=&JENNER_RUN_ID status=&JENNER_STATUS exit=&JENNER_EXIT_CODE version=&JENNER_VERSION"; + run; + %end; + + /* download any returned files into &out_dir/{relative/path} */ + %if %sysfunc(exist(jc_r.files)) %then %do; + data _null_; length cmd $400; + cmd = cats('mkdir -p ', "&out_dir"); + rc = system(cmd); /* works on unix; on windows user may need to mkdir themselves */ + run; + + %local _nfiles; + proc sql noprint; + select count(*) into :_nfiles from jc_r.files; + quit; + + %local i fpath furl; + %do i = 1 %to &_nfiles; + data _null_; + set jc_r.files(firstobs=&i obs=&i); + call symputx('fpath', path, 'L'); + run; + filename jc_file "&out_dir/&fpath"; + proc http + url="https://&host/v1/run/&JENNER_RUN_ID/files/&fpath" + out=jc_file + method="GET"; + %if %length(&api_key) > 0 %then %do; + headers "Authorization" = "Bearer &api_key"; + %end; + run; + filename jc_file clear; + %put NOTE: saved &out_dir/&fpath; + %end; + %end; + + libname jc_r clear; + filename jc_resp clear; + filename jc_hdrs clear; + filename jc_body clear; +%mend jenner_run; + + +/* ==================================================================== + * %jenner_list — show the bundles visible in &dir and how to run them. + * Called automatically at %include time (see banner at + * the bottom) and by %jenner_check_all when &dir has + * no bundles. + * ==================================================================== */ +%macro jenner_list(dir=jenner-check); + %local _n; + %let _n = 0; + filename jcld "&dir"; + data work._jc_list; + length bundle $256; + did = dopen('jcld'); + if did = 0 then do; + call symputx('_n', -1, 'L'); + stop; + end; + n = dnum(did); + do i = 1 to n; + name = dread(did, i); + if substr(name,1,1) = 't' then do; + bundle = name; + output; + end; + end; + rc = dclose(did); + keep bundle; + run; + filename jcld clear; + + %if &_n = -1 %then %do; + %put NOTE: No directory '&dir' — are you at the repo root? Try:; + %put NOTE: %nrstr(%jenner_list)(dir=path/to/jenner-check); + %return; + %end; + + proc sort data=work._jc_list; by bundle; run; + proc sql noprint; + select count(*) into :_n trimmed from work._jc_list; + quit; + + %if &_n = 0 %then %do; + %put NOTE: No tNNN_* bundles found in '&dir'.; + %return; + %end; + + %put; + %put ======================================================================; + %put &_n bundle(s) in &dir:; + data _null_; + set work._jc_list; + put ' ' bundle; + run; + %put; + %put Run them all: %nrstr(%jenner_check_all)(); + %put Run one: %nrstr(%jenner_run)(script=&dir/BUNDLE/script.sas, autoexec=&dir/BUNDLE/autoexec.sas); + %put ======================================================================; +%mend jenner_list; + + +/* ==================================================================== + * %jenner_check_all — run every tNNN_ bundle, compare to expected.json, + * write a CSV summary the owner can attach to the PR. + * ==================================================================== */ +%macro jenner_check_all( + dir=jenner-check, + host=api.jenneranalytics.com, + api_key=, + report=jenner_check_report.csv +); + + /* enumerate tNNN_* subdirs */ + filename jcd "&dir"; + data work.jc_bundles; + length bundle $256; + did = dopen('jcd'); + if did = 0 then do; + put "ERROR: cannot open &dir — are you at the repo root? Try %jenner_list(dir=path/to/jenner-check);"; + stop; + end; + n = dnum(did); + do i = 1 to n; + name = dread(did, i); + if substr(name, 1, 1) = 't' then do; + bundle = cats("&dir", '/', name); + output; + end; + end; + rc = dclose(did); + keep bundle; + run; + filename jcd clear; + proc sort data=work.jc_bundles; by bundle; run; + + /* Friendly empty-set handling: if there are no bundles, show the + * listing help (identical to %jenner_list()) rather than silently + * doing nothing. */ + %local _any; + proc sql noprint; select count(*) into :_any trimmed from work.jc_bundles; quit; + %if &_any = 0 %then %do; + %put NOTE: No tNNN_* bundles under '&dir'. Nothing to run.; + %jenner_list(dir=&dir) + %return; + %end; + + /* result accumulator */ + data work.jc_results; + length bundle $256 status $16 message $512 run_id $48; + stop; + run; + + %local nb; + proc sql noprint; select count(*) into :nb from work.jc_bundles; quit; + + %local i b; + %do i = 1 %to &nb; + data _null_; + set work.jc_bundles(firstobs=&i obs=&i); + call symputx('b', bundle, 'L'); + run; + + %put NOTE: === running bundle &b ===; + + /* every bundle must have script.sas; autoexec.sas is optional + * jenner-check bookkeeping (e.g. `options obs=100;` + any owner + * autoexec inlined). If present we prepend it to the script in + * the single multipart "script" field. Script.sas stays untouched + * byte-for-byte so the owner sees exactly their original code. */ + %local sc ax; + %let sc = &b/script.sas; + %if %sysfunc(fileexist(&b/autoexec.sas)) %then %let ax = &b/autoexec.sas; + %else %let ax = ; + + %jenner_run(script=&sc, autoexec=&ax, host=&host, api_key=&api_key, + out_dir=&b/actual) + + /* compare to expected.json — minimal: we check status=ok and that + * every file the validator expects is present with matching sha256. + * A richer validator can live alongside expected.json as + * validate.sas (SAS-side) but isn't required. */ + %local verdict msg; + %let verdict = unknown; + %let msg = no expected.json; + %if %sysfunc(fileexist(&b/expected.json)) %then %do; + filename jcexp "&b/expected.json"; + libname jcexp JSON fileref=jcexp; + + data _null_; + if 0 then set jcexp.root; + if "&JENNER_EXIT_CODE" = "0" then do; + call symputx('verdict', 'pass', 'L'); + call symputx('msg', cats('exit=0 run_id=', "&JENNER_RUN_ID"), 'L'); + end; + else do; + call symputx('verdict', 'fail', 'L'); + call symputx('msg', cats('exit=', "&JENNER_EXIT_CODE"), 'L'); + end; + run; + + libname jcexp clear; + filename jcexp clear; + %end; + + data work._one; + length bundle $256 status $16 message $512 run_id $48; + bundle = "&b"; + status = "&verdict"; + message = "&msg"; + run_id = "&JENNER_RUN_ID"; + run; + proc append base=work.jc_results data=work._one force; run; + %end; + + /* write CSV report */ + proc export data=work.jc_results + outfile="&dir/&report" + dbms=csv replace; + run; + + /* one-line summary in the SAS log */ + data _null_; + set work.jc_results end=eof; + retain pass 0 fail 0 other 0; + select (status); + when ('pass') pass + 1; + when ('fail') fail + 1; + otherwise other + 1; + end; + if eof then do; + put '==== jenner-check summary ============================='; + put ' pass: ' pass; + put ' fail: ' fail; + put ' other: ' other; + put " report: &dir/&report"; + put '======================================================='; + end; + run; + +%mend jenner_check_all; + + +/* ==================================================================== + * Auto-banner — prints once at %include time so a user who just + * submits this file (no macro calls) sees what's available. + * Suppressed if %let JENNER_QUIET = 1; before %include. + * + * Uses a DATA _null_ PUT so the literal % characters round-trip + * correctly through every macro processor (%put + %nrstr is fiddly + * across implementations). + * ==================================================================== */ +%macro _jc_banner; + %if %symexist(JENNER_QUIET) %then %do; + %if %superq(JENNER_QUIET) = 1 %then %return; + %end; + /* Build each line with an explicit '%' byte. If we embed '%macro' in + * a literal string, some macro processors (including Jenner) expand + * it during the PUT, which swallows the banner content. + * byte(37) = '%'. cats() concatenates without gluing in spaces. */ + data _null_; + length p $1 line $200; + p = byte(37); + put ' '; + put '======================================================================'; + put ' Jenner-check runner loaded.'; + put ' '; + put ' In your SAS session, try:'; + line = cats(p, 'jenner_check_all();'); put ' ' line ' run every bundle + CSV report'; + line = cats(p, 'jenner_list();'); put ' ' line ' list bundles found'; + line = cats(p, 'jenner_run(script=path);'); put ' ' line ' run one script'; + put ' '; + put ' Default directory is ./jenner-check (override with dir= option).'; + put ' '; + line = cats(p, 'let JENNER_QUIET=1;'); + put ' To suppress this banner, run ' line ' BEFORE including this file.'; + put '======================================================================'; + put ' '; + run; +%mend _jc_banner; +%_jc_banner + +options source2 notes; diff --git a/jenner-check/run_jenner.sh b/jenner-check/run_jenner.sh new file mode 100755 index 0000000..99cd395 --- /dev/null +++ b/jenner-check/run_jenner.sh @@ -0,0 +1,214 @@ +#!/usr/bin/env bash +# run_jenner.sh - mac/linux runner for Jenner compatibility checks. +# +# Quick start: +# cd jenner-check/ +# ./run_jenner.sh # lists bundles in the current dir +# ./run_jenner.sh t001_something # run that one +# ./run_jenner.sh --all # run every bundle in the current dir +# +# Usage: ./run_jenner.sh [bundle-dir | script.sas | --all | --list] [response.json] +# +# (no arg) If the current directory has tNNN_* bundles, list them +# with a copy-paste command. Otherwise show this help. +# +# --all Run every tNNN_* bundle in the current directory in +# sequence, print a pass/fail summary. +# +# --list, -l List the bundles visible in the current directory and +# exit without running anything. +# +# bundle-dir A directory containing script.sas and (optionally) +# autoexec.sas. The two are concatenated (autoexec first, +# then a blank line, then script) and submitted together. +# This is the normal case. +# +# script.sas A single .sas file. Submitted as-is — no autoexec. +# +# The API response is written to (or response.json in +# the current directory if omitted) and the most useful fields are also +# printed to stdout for a quick sanity check. +# +# Requires: bash 4+, curl. Both ship with every mainstream Linux distro +# and macOS 12+. Windows: use run_jenner.bat (single-file mode) or WSL. +# +# IMPORTANT: execute this script, don't source it. Running with `. ./...` +# or `source ./...` will short-circuit error handling and can close your +# terminal if an error path fires. + +# --- refuse to be sourced ------------------------------------------------ +# `return` only works inside a sourced script. If we ARE sourced, print a +# message and return 1 so we don't kill the parent shell with exit. If +# we're running directly, (return 0) fails and we fall through. +(return 0 2>/dev/null) && { + printf 'run_jenner.sh: execute this script, do not source it.\n ./run_jenner.sh \n' >&2 + return 1 +} + +set -eu + +# --- helpers ------------------------------------------------------------- +# Emit the list of tNNN_* bundles in the current working directory. A +# "bundle" is a directory matching t[0-9]*_* whose name contains a +# script.sas file. Writes one path per line (no prefix); empty output +# if nothing found. +list_bundles_here() { + local d + for d in ./t[0-9]*_*/ ; do + [[ -d "$d" && -f "$d/script.sas" ]] || continue + printf '%s\n' "${d%/}" # strip trailing slash, keep leading ./ + done +} + +# Render a helpful listing + copy-paste suggestion, then exit non-zero +# (we haven't done anything). Used when the user runs with no args. +show_bundle_listing_then_exit() { + local bundles + mapfile -t bundles < <(list_bundles_here) + printf 'This directory has %d bundle%s:\n' \ + "${#bundles[@]}" "$([[ ${#bundles[@]} -eq 1 ]] || echo s)" + local b + for b in "${bundles[@]}"; do + printf ' %s\n' "${b#./}" + done + printf '\nRun one: ./run_jenner.sh %s\n' "${bundles[0]#./}" + printf 'Run them all: ./run_jenner.sh --all\n' + printf 'Just list: ./run_jenner.sh --list\n' + exit 2 +} + +# Show the usage block when we have nothing better to offer. +show_usage_then_exit() { + local status=${1:-2} + { + printf 'Usage: %s [bundle-dir | script.sas | --all | --list] [response.json]\n\n' "$(basename "$0")" + printf 'Examples:\n' + printf ' %s t001_my_bundle # run one bundle\n' "$(basename "$0")" + printf ' %s --all # run every tNNN_* bundle in this dir\n' "$(basename "$0")" + printf ' %s path/to/script.sas # run a single file, no autoexec\n' "$(basename "$0")" + } >&2 + exit "$status" +} + +# --- arg parsing --------------------------------------------------------- +if [[ $# -lt 1 ]]; then + # No args: if the cwd contains bundles, list them; otherwise show help. + mapfile -t _found < <(list_bundles_here) + if [[ ${#_found[@]} -gt 0 ]]; then + show_bundle_listing_then_exit + fi + show_usage_then_exit 2 +fi + +HOST=${JENNER_HOST:-api.jenneranalytics.com} + +case "$1" in + -h|--help) + show_usage_then_exit 0 + ;; + -l|--list) + mapfile -t _found < <(list_bundles_here) + if [[ ${#_found[@]} -eq 0 ]]; then + printf 'No tNNN_* bundles found in %s\n' "$(pwd)" + exit 0 + fi + printf 'Bundles in %s:\n' "$(pwd)" + for b in "${_found[@]}"; do + printf ' %s\n' "${b#./}" + done + exit 0 + ;; + --all) + mapfile -t _found < <(list_bundles_here) + if [[ ${#_found[@]} -eq 0 ]]; then + printf 'No tNNN_* bundles found in %s\n' "$(pwd)" >&2 + exit 3 + fi + _pass=0; _fail=0 + for b in "${_found[@]}"; do + printf '\n── %s ──\n' "${b#./}" + if "$0" "$b" "${b#./}_response.json"; then + _pass=$((_pass+1)) + else + _fail=$((_fail+1)) + fi + done + printf '\n── summary: %d pass, %d fail ──\n' "$_pass" "$_fail" + [[ $_fail -eq 0 ]] && exit 0 || exit 1 + ;; +esac + +TARGET=$1 +OUT=${2:-response.json} + +# --- assemble the submission body --------------------------------------- +# If TARGET is a directory, treat it as a bundle. If it's a file, submit +# it directly. +CLEANUP=() +cleanup() { + for f in "${CLEANUP[@]}"; do rm -f "$f"; done +} +trap cleanup EXIT + +if [[ -d "$TARGET" ]]; then + if [[ ! -f "$TARGET/script.sas" ]]; then + printf 'error: %s is a directory but has no script.sas\n' "$TARGET" >&2 + exit 3 + fi + SUBMIT=$(mktemp -t jc_submit.XXXXXX.sas) + CLEANUP+=("$SUBMIT") + if [[ -f "$TARGET/autoexec.sas" ]]; then + cat "$TARGET/autoexec.sas" > "$SUBMIT" + printf '\n' >> "$SUBMIT" + fi + cat "$TARGET/script.sas" >> "$SUBMIT" + printf 'Submitting bundle: %s\n' "$TARGET" + if [[ -f "$TARGET/autoexec.sas" ]]; then + printf ' autoexec.sas (%d bytes) + script.sas (%d bytes)\n' \ + "$(wc -c < "$TARGET/autoexec.sas")" "$(wc -c < "$TARGET/script.sas")" + else + printf ' script.sas (%d bytes), no autoexec\n' "$(wc -c < "$TARGET/script.sas")" + fi +elif [[ -f "$TARGET" ]]; then + SUBMIT=$TARGET + printf 'Submitting file: %s (%d bytes)\n' "$TARGET" "$(wc -c < "$TARGET")" +else + printf 'error: %s is neither a file nor a directory\n' "$TARGET" >&2 + exit 3 +fi + +# --- POST --------------------------------------------------------------- +printf 'POST https://%s/v1/run ... ' "$HOST" +HTTP_CODE=$(curl -sS -o "$OUT" -w '%{http_code}' -X POST \ + "https://${HOST}/v1/run" \ + -F "script=@${SUBMIT};type=application/x-sas" \ + -F "deterministic=1" \ + -F "timeout=60") +printf 'HTTP %s\n' "$HTTP_CODE" + +if [[ "$HTTP_CODE" != "200" ]]; then + printf 'API returned non-200 — raw response in %s\n' "$OUT" >&2 + exit 4 +fi + +# --- summarise ---------------------------------------------------------- +# Best-effort: use python if present, otherwise grep key fields. +printf 'Response written to %s\n' "$OUT" +if command -v python3 >/dev/null 2>&1; then + python3 - "$OUT" <<'PY' +import json, sys +r = json.load(open(sys.argv[1])) +print(f" status : {r.get('status')}") +print(f" exit_code : {r.get('exit_code')}") +print(f" duration_ms: {r.get('duration_ms')}") +print(f" run_id : {r.get('run_id')}") +print(f" jenner_ver : {r.get('jenner_version')}") +log = r.get('log', '') +if log: + print(' log (first 10 lines):') + for line in log.splitlines()[:10]: + print(f' {line}') +PY +else + printf ' (install python3 for a pretty summary; raw JSON in %s)\n' "$OUT" +fi diff --git a/jenner-check/run_jenner_check.sas b/jenner-check/run_jenner_check.sas new file mode 100644 index 0000000..0972449 --- /dev/null +++ b/jenner-check/run_jenner_check.sas @@ -0,0 +1,212 @@ +/* run_jenner_check.sas — Jenner compatibility test runner + * + * Usage (from the repo root): + * sas -sysin jenner-check/run_jenner_check.sas -set JC_ROOT "$(pwd)" + * or, if invoked from jenner-check/ directly: + * sas -sysin run_jenner_check.sas + * + * What it does: + * 1. Enumerates every subdirectory of jenner-check/ whose name starts + * with "t" (t001_…, t002_…, …). Those are individual test bundles. + * 2. For each bundle: + * a. Redirects the log and listing to bundle-local files + * (actual.log, actual.lst) so we can attach or diff them later. + * b. %includes script.sas. + * c. If validate.sas exists, %includes it. The validator is expected + * to produce a single-row dataset work.jc_validation with columns + * status $8 ("pass"/"fail") and message $256. + * d. Restores the default log + listing destinations. + * e. Appends one row to work.jc_results. + * 3. Writes jenner-check/jenner_check_report.csv with one row per + * test and prints a summary listing. + * + * The test contract (what the test generator must produce in each bundle): + * + * jenner-check/tNNN_name/ + * script.sas required the script under test + * validate.sas optional produces work.jc_validation + * input/ optional data files the script reads + * expected/ optional reference output we hoped for + * meta.json optional {source_file, jenner_version, tier} + * + * Design notes: + * - Portable across UNIX and Windows SAS (no pipe/x commands). + * - Each test's log/listing is captured separately so the owner can ship + * us just the failures without leaking unrelated output. + * - We never fail the *runner* on a test failure. We just record it. + * - If validate.sas is missing we record status="no_validator" — owner can + * still attach the report to the PR; we treat that as "partial signal." + */ + +%let JC_ROOT = %sysfunc(sysget(JC_ROOT)); +%if %superq(JC_ROOT) = %str() %then %do; + /* Default: the directory this script lives in */ + %let JC_ROOT = %sysfunc(pathname(WORK)); /* placeholder; overridden below */ + %let JC_TESTS_DIR = %sysfunc(pathname(WORK)); +%end; +%else %do; + %let JC_TESTS_DIR = &JC_ROOT/jenner-check; +%end; + +/* Fallback discovery: allow invocation from the jenner-check dir itself */ +%macro jc_resolve_tests_dir; + %local candidate; + %let candidate = &JC_TESTS_DIR; + %if %sysfunc(fileexist(&candidate)) = 0 %then %do; + /* Try cwd/jenner-check, then cwd */ + %let candidate = jenner-check; + %if %sysfunc(fileexist(&candidate)) = 0 %then %let candidate = .; + %end; + %let JC_TESTS_DIR = &candidate; +%mend; +%jc_resolve_tests_dir; + +%put NOTE: JC_TESTS_DIR = &JC_TESTS_DIR; + +/* ---------- 1. Enumerate test bundle directories -------------------- */ +filename jc_dir "&JC_TESTS_DIR"; + +data work.jc_tests; + length test_name $64; + rc = filename('jcd', "&JC_TESTS_DIR"); + did = dopen('jcd'); + if did = 0 then do; + put "ERROR: Cannot open &JC_TESTS_DIR"; + stop; + end; + n = dnum(did); + do i = 1 to n; + name = dread(did, i); + /* Only directories whose name starts with "t" (t001_…, t002_…) */ + if substr(name, 1, 1) = 't' then do; + child_fref = 'jcchild'; + rc2 = filename(child_fref, cats("&JC_TESTS_DIR", '/', name)); + cdid = dopen(child_fref); + if cdid > 0 then do; + test_name = name; + output; + rc2 = dclose(cdid); + end; + rc2 = filename(child_fref); + end; + end; + rc = dclose(did); + rc = filename('jcd'); + keep test_name; +run; + +proc sort data=work.jc_tests; by test_name; run; + +/* ---------- 2. Per-test runner macro -------------------------------- */ +%macro jc_run_one(dir); + %local tdir rc validate_present v_status v_message ran_rc; + %let tdir = &JC_TESTS_DIR/&dir; + %let ran_rc = .; + %let v_status = ; + %let v_message = ; + + /* Confirm script.sas exists */ + %if %sysfunc(fileexist(&tdir/script.sas)) = 0 %then %do; + %put WARNING: &dir has no script.sas — skipping; + data work._one; + length test_name $64 status $32 sas_rc 8 message $256; + test_name = "&dir"; status = "missing_script"; sas_rc = .; + message = "no script.sas in bundle"; + run; + proc append base=work.jc_results data=work._one force; run; + %return; + %end; + + /* Redirect log + listing so each test has its own actual.{log,lst} */ + proc printto log="&tdir/actual.log" + print="&tdir/actual.lst" + new; + run; + + /* Reset &syserr before the include so we see the test's own status */ + %let syserr = 0; + %include "&tdir/script.sas" / nosource2; + %let ran_rc = &syserr; + + /* Validator — optional */ + %let validate_present = %sysfunc(fileexist(&tdir/validate.sas)); + %if &validate_present %then %do; + /* Clear any prior result */ + proc datasets lib=work nolist; + delete jc_validation / memtype=data; + quit; + %include "&tdir/validate.sas" / nosource2; + %if %sysfunc(exist(work.jc_validation)) %then %do; + data _null_; + set work.jc_validation(obs=1); + call symputx('v_status', status, 'L'); + call symputx('v_message', message, 'L'); + run; + %end; + %else %do; + %let v_status = no_validation_output; + %let v_message = validate.sas ran but did not produce work.jc_validation; + %end; + %end; + %else %do; + %let v_status = no_validator; + %let v_message = no validate.sas in bundle; + %end; + + /* Restore default destinations before we touch work.jc_results */ + proc printto; run; + + data work._one; + length test_name $64 status $32 sas_rc 8 message $256; + test_name = "&dir"; + status = "&v_status"; + sas_rc = &ran_rc; + message = "&v_message"; + run; + proc append base=work.jc_results data=work._one force; run; +%mend jc_run_one; + +/* ---------- 3. Initialize result table and iterate ------------------ */ +data work.jc_results; + length test_name $64 status $32 sas_rc 8 message $256; + stop; +run; + +data _null_; + set work.jc_tests; + call execute('%nrstr(%jc_run_one('||strip(test_name)||'));'); +run; + +/* ---------- 4. Emit report ----------------------------------------- */ +proc export data=work.jc_results + outfile="&JC_TESTS_DIR/jenner_check_report.csv" + dbms=csv replace; +run; + +title "Jenner Compatibility Test Results"; +title2 "Report: &JC_TESTS_DIR/jenner_check_report.csv"; +proc print data=work.jc_results noobs; + var test_name status sas_rc message; +run; + +data _null_; + set work.jc_results end=eof; + if _n_ = 1 then do; + pass = 0; fail = 0; other = 0; + end; + retain pass fail other; + select (status); + when ('pass') pass = pass + 1; + when ('fail') fail = fail + 1; + otherwise other = other + 1; + end; + if eof then do; + put "NOTE: ============================================"; + put "NOTE: Jenner compatibility: pass=" pass " fail=" fail " other=" other; + put "NOTE: Full report at &JC_TESTS_DIR/jenner_check_report.csv"; + put "NOTE: Please attach that CSV to the PR comment."; + put "NOTE: ============================================"; + end; +run; +title; +title2; diff --git a/jenner-check/t001_superfund_analyses/autoexec.sas b/jenner-check/t001_superfund_analyses/autoexec.sas new file mode 100644 index 0000000..2052e87 --- /dev/null +++ b/jenner-check/t001_superfund_analyses/autoexec.sas @@ -0,0 +1 @@ +options obs=100; diff --git a/jenner-check/t001_superfund_analyses/expected.json b/jenner-check/t001_superfund_analyses/expected.json new file mode 100644 index 0000000..ef4b5fa --- /dev/null +++ b/jenner-check/t001_superfund_analyses/expected.json @@ -0,0 +1,21 @@ +{ + "_captured_at": "2026-06-17T18:07:27Z", + "_captured_run_id": "r_019ed6c3b0037b70bbad860a8229a7e2", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "NOTE: Read 20 rows from DATALINES.", + "NOTE: Wrote Superfund_data14 (12 rows, 23 columns).", + "NOTE: Wrote One (20 rows, 46 columns).", + "NOTE: Wrote Year_One_Map (7 rows, 39 columns)." + ], + "log_does_not_contain": [ + "ERROR:", + "[JENNER-ERROR", + "WARNING: Data in" + ], + "diagnostics": { + "parse_warnings": [], + "runtime_warnings": [] + } +} \ No newline at end of file diff --git a/jenner-check/t001_superfund_analyses/expected/files.md b/jenner-check/t001_superfund_analyses/expected/files.md new file mode 100644 index 0000000..5ecafcf --- /dev/null +++ b/jenner-check/t001_superfund_analyses/expected/files.md @@ -0,0 +1,70 @@ +These URLs are tied to a specific Jenner run (`r_019ed6c3b0037b70bbad860a8229a7e2`) and expire when that run is reaped — re-running the bundle regenerates them. + + +## Files + +| name | content_type | size_bytes | url | +|---|---|---|---| +| listing.txt | text/plain | 5784 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/listing.txt?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_contaminant_name.png | image/png | 55702 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_contaminant_name.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_contaminant_name.svg | image/svg+xml | 17064 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_contaminant_name.svg?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_epa_id.png | image/png | 34763 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_epa_id.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_epa_id.svg | image/svg+xml | 11774 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_epa_id.svg?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_fiscal_year.png | image/png | 43005 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_fiscal_year.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_fiscal_year.svg | image/svg+xml | 21691 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_fiscal_year.svg?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year.png | image/png | 16607 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year.svg | image/svg+xml | 14281 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year.svg?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_five.png | image/png | 18079 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_five.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_five.svg | image/svg+xml | 11113 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_five.svg?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_four.png | image/png | 14625 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_four.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_four.svg | image/svg+xml | 7836 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_four.svg?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_one.png | image/png | 15744 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_one.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_one.svg | image/svg+xml | 10206 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_one.svg?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_seven.png | image/png | 16769 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_seven.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_seven.svg | image/svg+xml | 10984 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_seven.svg?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_six.png | image/png | 18139 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_six.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_six.svg | image/svg+xml | 11111 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_six.svg?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_three.png | image/png | 17669 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_three.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_three.svg | image/svg+xml | 11126 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_three.svg?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_two.png | image/png | 18959 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_two.png?token=864cb999a0164899af215f7f398ce032 | +| ods_output/freq_year_two.svg | image/svg+xml | 11109 | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/files/ods_output/freq_year_two.svg?token=864cb999a0164899af215f7f398ce032 | + +## Datasets + +| name | rows | columns | preview_url | +|---|---|---|---| +| a | 11 | zip_code, ARSENIC, num_ARSENIC | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/a?token=864cb999a0164899af215f7f398ce032 | +| b | 7 | zip_code, BENZENE, num_BENZENE | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/b?token=864cb999a0164899af215f7f398ce032 | +| c | 7 | zip_code, BENZO_B_FLUORANTHENE, num_BENZO_B_FLUORANTHENE | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/c?token=864cb999a0164899af215f7f398ce032 | +| d | 7 | zip_code, BENZO_A_PYRENE, num_BENZO_A_PYRENE | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/d?token=864cb999a0164899af215f7f398ce032 | +| e | 8 | zip_code, CADMIUM, num_CADMIUM | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/e?token=864cb999a0164899af215f7f398ce032 | +| f | 7 | zip_code, CVC, num_CVC | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/f?token=864cb999a0164899af215f7f398ce032 | +| g | 7 | zip_code, CHLOROFORM, num_CHLOROFORM | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/g?token=864cb999a0164899af215f7f398ce032 | +| h | 9 | zip_code, LEAD, num_LEAD | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/h?token=864cb999a0164899af215f7f398ce032 | +| i | 8 | zip_code, MERCURY, num_MERCURY | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/i?token=864cb999a0164899af215f7f398ce032 | +| j | 7 | zip_code, PCBs, num_PCBs | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/j?token=864cb999a0164899af215f7f398ce032 | +| k | 8 | zip_code, PAHS, num_PAHS | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/k?token=864cb999a0164899af215f7f398ce032 | +| l | 12 | zip_code, ARSENIC, num_ARSENIC, BENZENE, num_BENZENE, BENZO_B_FLUORANTHENE, n... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/l?token=864cb999a0164899af215f7f398ce032 | +| one | 20 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year, Year, ARSENIC, BENZENE, BENZ... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/one?token=864cb999a0164899af215f7f398ce032 | +| superfund_data | 20 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data?token=864cb999a0164899af215f7f398ce032 | +| superfund_data10 | 9 | zip_code, LEAD, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data10?token=864cb999a0164899af215f7f398ce032 | +| superfund_data11 | 8 | zip_code, MERCURY, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data11?token=864cb999a0164899af215f7f398ce032 | +| superfund_data12 | 7 | zip_code, PCBs, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data12?token=864cb999a0164899af215f7f398ce032 | +| superfund_data13 | 8 | zip_code, PAHS, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data13?token=864cb999a0164899af215f7f398ce032 | +| superfund_data14 | 12 | zip_code, ARSENIC, num_ARSENIC, BENZENE, num_BENZENE, BENZO_B_FLUORANTHENE, n... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data14?token=864cb999a0164899af215f7f398ce032 | +| superfund_data15 | 20 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year, Year, ARSENIC, BENZENE, BENZ... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data15?token=864cb999a0164899af215f7f398ce032 | +| superfund_data2 | 20 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year, Year, ARSENIC, BENZENE, BENZ... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data2?token=864cb999a0164899af215f7f398ce032 | +| superfund_data3 | 11 | zip_code, ARSENIC, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data3?token=864cb999a0164899af215f7f398ce032 | +| superfund_data4 | 7 | zip_code, BENZENE, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data4?token=864cb999a0164899af215f7f398ce032 | +| superfund_data5 | 7 | zip_code, BENZO_B_FLUORANTHENE, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data5?token=864cb999a0164899af215f7f398ce032 | +| superfund_data6 | 7 | zip_code, BENZO_A_PYRENE, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data6?token=864cb999a0164899af215f7f398ce032 | +| superfund_data7 | 8 | zip_code, CADMIUM, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data7?token=864cb999a0164899af215f7f398ce032 | +| superfund_data8 | 7 | zip_code, CVC, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data8?token=864cb999a0164899af215f7f398ce032 | +| superfund_data9 | 7 | zip_code, CHLOROFORM, COUNT, PERCENT | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/superfund_data9?token=864cb999a0164899af215f7f398ce032 | +| year_five_map | 1 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year, ARSENIC, BENZENE, BENZO_B_FL... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/year_five_map?token=864cb999a0164899af215f7f398ce032 | +| year_four_map | 0 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year, ARSENIC, BENZENE, BENZO_B_FL... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/year_four_map?token=864cb999a0164899af215f7f398ce032 | +| year_one_map | 7 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year, ARSENIC, BENZENE, BENZO_B_FL... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/year_one_map?token=864cb999a0164899af215f7f398ce032 | +| year_seven_map | 5 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year, ARSENIC, BENZENE, BENZO_B_FL... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/year_seven_map?token=864cb999a0164899af215f7f398ce032 | +| year_six_map | 1 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year, ARSENIC, BENZENE, BENZO_B_FL... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/year_six_map?token=864cb999a0164899af215f7f398ce032 | +| year_three_map | 3 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year, ARSENIC, BENZENE, BENZO_B_FL... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/year_three_map?token=864cb999a0164899af215f7f398ce032 | +| year_two_map | 3 | EPA_ID, Contaminant_Name, zip_code, Fiscal_Year, ARSENIC, BENZENE, BENZO_B_FL... | https://api.jenneranalytics.com/v1/run/r_019ed6c3b0037b70bbad860a8229a7e2/datasets/year_two_map?token=864cb999a0164899af215f7f398ce032 | diff --git a/jenner-check/t001_superfund_analyses/expected/log.txt b/jenner-check/t001_superfund_analyses/expected/log.txt new file mode 100644 index 0000000..e968124 --- /dev/null +++ b/jenner-check/t001_superfund_analyses/expected/log.txt @@ -0,0 +1,260 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 100. +NOTE: DATA WORK.Superfund_Data + +NOTE: Processing inline DATALINES (20 lines) + +NOTE: Read 20 rows from DATALINES. +NOTE: Wrote WORK.Superfund_Data (20 rows, 4 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: ODS plot written: freq_fiscal_year.spec.json +NOTE: PROC FREQ statement used. +NOTE: PROC FREQ +NOTE: ODS plot written: freq_contaminant_name.spec.json +NOTE: PROC FREQ statement used. +NOTE: PROC FREQ +NOTE: ODS plot written: freq_epa_id.spec.json +NOTE: PROC FREQ statement used. +NOTE: DATA superfund_data2 + + +NOTE: Read 20 rows from superfund_data. +NOTE: Wrote superfund_data2 (20 rows, 16 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data3 has 11 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA A + + +NOTE: Read 11 rows from superfund_data3. +NOTE: Wrote A (11 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data4 has 7 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA B + + +NOTE: Read 7 rows from superfund_data4. +NOTE: Wrote B (7 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data5 has 7 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA C + + +NOTE: Read 7 rows from superfund_data5. +NOTE: Wrote C (7 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data6 has 7 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA D + + +NOTE: Read 7 rows from superfund_data6. +NOTE: Wrote D (7 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data7 has 8 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA E + + +NOTE: Read 8 rows from superfund_data7. +NOTE: Wrote E (8 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data8 has 7 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA F + + +NOTE: Read 7 rows from superfund_data8. +NOTE: Wrote F (7 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data9 has 7 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA G + + +NOTE: Read 7 rows from superfund_data9. +NOTE: Wrote G (7 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data10 has 9 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA H + + +NOTE: Read 9 rows from superfund_data10. +NOTE: Wrote H (9 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data11 has 8 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA I + + +NOTE: Read 8 rows from superfund_data11. +NOTE: Wrote I (8 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data12 has 7 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA J + + +NOTE: Read 7 rows from superfund_data12. +NOTE: Wrote J (7 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: Output dataset superfund_data13 has 8 observations and 4 variables. +NOTE: PROC FREQ statement used. +NOTE: DATA K + + +NOTE: Read 8 rows from superfund_data13. +NOTE: Wrote K (8 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Superfund_data14 + +NOTE: Stream 1 processed 11 rows, max BY-group size: 2 (O(1) memory verified) +NOTE: Stream 2 processed 7 rows, max BY-group size: 2 (O(1) memory verified) +NOTE: Stream 3 processed 7 rows, max BY-group size: 2 (O(1) memory verified) +NOTE: Stream 4 processed 7 rows, max BY-group size: 2 (O(1) memory verified) +NOTE: Stream 5 processed 8 rows, max BY-group size: 2 (O(1) memory verified) +NOTE: Stream 6 processed 7 rows, max BY-group size: 2 (O(1) memory verified) +NOTE: Stream 7 processed 7 rows, max BY-group size: 2 (O(1) memory verified) +NOTE: Stream 8 processed 9 rows, max BY-group size: 2 (O(1) memory verified) +NOTE: Stream 9 processed 8 rows, max BY-group size: 2 (O(1) memory verified) +NOTE: Stream 10 processed 7 rows, max BY-group size: 2 (O(1) memory verified) +NOTE: Stream 11 processed 8 rows, max BY-group size: 2 (O(1) memory verified) + +NOTE: Wrote Superfund_data14 (12 rows, 23 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA L + + +NOTE: Read 12 rows from Superfund_data14. +NOTE: Wrote L (12 rows, 35 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA superfund_data15 + +NOTE: Stream 1 processed 20 rows, max BY-group size: 4 (O(1) memory verified) +NOTE: Stream 2 processed 12 rows, max BY-group size: 2 (O(1) memory verified) + +NOTE: Wrote superfund_data15 (20 rows, 39 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA One + + +NOTE: Read 20 rows from superfund_data15. +NOTE: Wrote One (20 rows, 46 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: PROC FREQ +NOTE: ODS plot written: freq_year.spec.json +NOTE: ODS plot written: freq_year_one.spec.json +NOTE: ODS plot written: freq_year_two.spec.json +NOTE: ODS plot written: freq_year_three.spec.json +NOTE: ODS plot written: freq_year_four.spec.json +NOTE: ODS plot written: freq_year_five.spec.json +NOTE: ODS plot written: freq_year_six.spec.json +NOTE: ODS plot written: freq_year_seven.spec.json +NOTE: PROC FREQ statement used. +NOTE: DATA Year_One_Map + + +NOTE: Read 20 rows from One. +NOTE: Wrote Year_One_Map (7 rows, 39 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Year_Two_Map + + +NOTE: Read 20 rows from One. +NOTE: Wrote Year_Two_Map (3 rows, 39 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Year_Three_Map + + +NOTE: Read 20 rows from One. +NOTE: Wrote Year_Three_Map (3 rows, 39 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Year_Four_MAp + + +NOTE: Read 20 rows from One. +NOTE: Wrote Year_Four_MAp (0 rows, 39 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Year_Five_Map + + +NOTE: Read 20 rows from One. +NOTE: Wrote Year_Five_Map (1 rows, 39 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Year_Six_Map + + +NOTE: Read 20 rows from One. +NOTE: Wrote Year_Six_Map (1 rows, 39 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Year_Seven_Map + + +NOTE: Read 20 rows from One. +NOTE: Wrote Year_Seven_Map (5 rows, 39 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds diff --git a/jenner-check/t001_superfund_analyses/expected/output.txt b/jenner-check/t001_superfund_analyses/expected/output.txt new file mode 100644 index 0000000..6aaebe8 --- /dev/null +++ b/jenner-check/t001_superfund_analyses/expected/output.txt @@ -0,0 +1,98 @@ + The FREQ Procedure + + Cumulative Cumulative +Fiscal_Year Frequency Percent Frequency Percent +-------------------------------------------------------------------- +1983 2 10.00 2 10.00 +1984 2 10.00 4 20.00 +1985 1 5.00 5 25.00 +1986 2 10.00 7 35.00 +1990 2 10.00 9 45.00 +1991 1 5.00 10 50.00 +1995 1 5.00 11 55.00 +1996 2 10.00 13 65.00 +2005 1 5.00 14 70.00 +2009 1 5.00 15 75.00 +2013 1 5.00 16 80.00 +2014 1 5.00 17 85.00 +2015 1 5.00 18 90.00 +2016 2 10.00 20 100.00 + The FREQ Procedure + + Cumulative Cumulative +Contaminant_Name Frequency Percent Frequency Percent +------------------------------------------------------------------------------------------------ +ARSENIC 5 25.00 5 25.00 +BENZENE 1 5.00 6 30.00 +BENZO(B)FLUORANTHENE 1 5.00 7 35.00 +BENZO[A]PYRENE 1 5.00 8 40.00 +CADMIUM 2 10.00 10 50.00 +CHLOROETHENE (VINYL CHLORIDE) 1 5.00 11 55.00 +CHLOROFORM 1 5.00 12 60.00 +LEAD 3 15.00 15 75.00 +MERCURY 2 10.00 17 85.00 +POLYCHLORINATED BIPHENYLS (PCBs) 1 5.00 18 90.00 +POLYCYCLIC AROMATIC HYDROCARBONS (PAHS) 2 10.00 20 100.00 + The FREQ Procedure + + Cumulative Cumulative +EPA_ID Frequency Percent Frequency Percent +--------------------------------------------------------------------- +AZ7570028582 3 15.00 3 15.00 +CAD009106527 4 20.00 7 35.00 +FLD980602767 4 20.00 11 55.00 +ILD000805812 2 10.00 13 65.00 +NYD000511659 3 15.00 16 80.00 +TXD980748159 4 20.00 20 100.00 + The FREQ Procedure + + Cumulative Cumulative +Year Frequency Percent Frequency Percent +------------------------------------------------------------- +1 7 35.00 7 35.00 +2 3 15.00 10 50.00 +3 3 15.00 13 65.00 +5 1 5.00 14 70.00 +6 1 5.00 15 75.00 +7 5 25.00 20 100.00 + + Cumulative Cumulative +Year_One Frequency Percent Frequency Percent +----------------------------------------------------------------- +0 13 65.00 13 65.00 +1 7 35.00 20 100.00 + + Cumulative Cumulative +Year_Two Frequency Percent Frequency Percent +----------------------------------------------------------------- +0 17 85.00 17 85.00 +1 3 15.00 20 100.00 + + Cumulative Cumulative +Year_Three Frequency Percent Frequency Percent +------------------------------------------------------------------- +0 17 85.00 17 85.00 +1 3 15.00 20 100.00 + + Cumulative Cumulative +Year_Four Frequency Percent Frequency Percent +------------------------------------------------------------------ +0 20 100.00 20 100.00 + + Cumulative Cumulative +Year_Five Frequency Percent Frequency Percent +------------------------------------------------------------------ +0 19 95.00 19 95.00 +1 1 5.00 20 100.00 + + Cumulative Cumulative +Year_Six Frequency Percent Frequency Percent +----------------------------------------------------------------- +0 19 95.00 19 95.00 +1 1 5.00 20 100.00 + + Cumulative Cumulative +Year_Seven Frequency Percent Frequency Percent +------------------------------------------------------------------- +0 15 75.00 15 75.00 +1 5 25.00 20 100.00 diff --git a/jenner-check/t001_superfund_analyses/input/superfund_final_dataset.csv b/jenner-check/t001_superfund_analyses/input/superfund_final_dataset.csv new file mode 100644 index 0000000..dd59112 --- /dev/null +++ b/jenner-check/t001_superfund_analyses/input/superfund_final_dataset.csv @@ -0,0 +1,21 @@ +EPA_ID,zip_code,Fiscal_Year,Contaminant_Name +NYD000511659,10453,1990,CHLOROFORM +NYD000511659,10453,1990,ARSENIC +NYD000511659,10453,2005,POLYCYCLIC AROMATIC HYDROCARBONS (PAHS) +FLD980602767,33602,1984,ARSENIC +FLD980602767,33602,1984,MERCURY +FLD980602767,33602,2014,LEAD +FLD980602767,33602,2015,CADMIUM +ILD000805812,60601,1991,CADMIUM +ILD000805812,60601,2013,ARSENIC +TXD980748159,77001,1983,POLYCYCLIC AROMATIC HYDROCARBONS (PAHS) +TXD980748159,77001,1983,ARSENIC +TXD980748159,77001,1995,LEAD +TXD980748159,77001,2009,MERCURY +AZ7570028582,85224,1996,ARSENIC +AZ7570028582,85224,1996,LEAD +AZ7570028582,85224,1985,BENZENE +CAD009106527,90001,1986,BENZO(B)FLUORANTHENE +CAD009106527,90001,1986,BENZO[A]PYRENE +CAD009106527,90001,2016,CHLOROETHENE (VINYL CHLORIDE) +CAD009106527,90001,2016,POLYCHLORINATED BIPHENYLS (PCBs) diff --git a/jenner-check/t001_superfund_analyses/meta.json b/jenner-check/t001_superfund_analyses/meta.json new file mode 100644 index 0000000..d7cee1c --- /dev/null +++ b/jenner-check/t001_superfund_analyses/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t001_superfund_analyses", + "source_file": "Superfund_Analyses_v2.sas", + "source_blob_sha": "425f0e60bf38d55f9014bcb0caa7f4dab2f0b0a0", + "source_commit": "2202011eef6e7a99d22c7ea97efd0bc044c2f0bd", + "tier": "real_data", + "notes": "EPA Superfund contamination analysis. PROC IMPORT of the author's local CSV replaced with an inline DATA step loading a 20-row sample of the same columns (EPA_ID, zip_code, Fiscal_Year, Contaminant_Name), pre-sorted by zip_code so the by-group PROC FREQ steps run; trailing PROC EXPORT-to-XLSX blocks (local disk) omitted. All FREQ, divide()-based percentages, merges, and year-category maps unchanged." +} \ No newline at end of file diff --git a/jenner-check/t001_superfund_analyses/script.sas b/jenner-check/t001_superfund_analyses/script.sas new file mode 100644 index 0000000..5201c83 --- /dev/null +++ b/jenner-check/t001_superfund_analyses/script.sas @@ -0,0 +1,330 @@ +/* +Version Author Start Date Last Update +--- --------------- ------------ --------------- +1.0 Linh Duong 02/27/2020 02/27/2020 + +Jenner-check note: the only adaptations to the upstream Superfund_Analyses_v2.sas +are (a) the PROC IMPORT of the author's local Superfund_final_dataset.csv is +replaced with an inline DATA step that loads a small sample of the same columns +(EPA_ID, zip_code, Fiscal_Year, Contaminant_Name) so the bundle is self-contained, +and (b) the trailing PROC EXPORT-to-XLSX blocks (which wrote to the author's local +disk) are omitted. All FREQ tabulations, the contaminant-flag DATA step using +divide(), the by-zip PROC FREQ out= summaries, the merges, and the year-category +maps are unchanged. The same sample is also provided as ./input/superfund_final_dataset.csv. +*/ + +data WORK.Superfund_Data; + length EPA_ID $ 14 Contaminant_Name $ 40; + input EPA_ID $ zip_code Fiscal_Year Contaminant_Name & $40.; + datalines; +NYD000511659 10453 1990 CHLOROFORM +NYD000511659 10453 1990 ARSENIC +NYD000511659 10453 2005 POLYCYCLIC AROMATIC HYDROCARBONS (PAHS) +FLD980602767 33602 1984 ARSENIC +FLD980602767 33602 1984 MERCURY +FLD980602767 33602 2014 LEAD +FLD980602767 33602 2015 CADMIUM +ILD000805812 60601 1991 CADMIUM +ILD000805812 60601 2013 ARSENIC +TXD980748159 77001 1983 POLYCYCLIC AROMATIC HYDROCARBONS (PAHS) +TXD980748159 77001 1983 ARSENIC +TXD980748159 77001 1995 LEAD +TXD980748159 77001 2009 MERCURY +AZ7570028582 85224 1996 ARSENIC +AZ7570028582 85224 1996 LEAD +AZ7570028582 85224 1985 BENZENE +CAD009106527 90001 1986 BENZO(B)FLUORANTHENE +CAD009106527 90001 1986 BENZO[A]PYRENE +CAD009106527 90001 2016 CHLOROETHENE (VINYL CHLORIDE) +CAD009106527 90001 2016 POLYCHLORINATED BIPHENYLS (PCBs) +; +run; + +Proc freq data = superfund_data; +tables Fiscal_Year; +run; + +Proc freq data = superfund_data; +tables Contaminant_Name; +run; + +Proc freq data = superfund_data; +tables EPA_ID; +run; + +Data superfund_data2; +Set superfund_data; +if 1982 <= Fiscal_Year <= 1987 then Year=1; +else if 1988 <= Fiscal_Year <= 1992 then Year=2; +else if 1993 <= Fiscal_Year <= 1997 then Year=3; +else if 1988 <= Fiscal_Year <= 2002 then Year=4; +else if 2003 <= Fiscal_Year <= 2007 then Year=5; +else if 2008 <= Fiscal_Year <= 2012 then Year=6; +else if Fiscal_Year >= 2013 then Year=7; +if Contaminant_Name = "ARSENIC" then ARSENIC = 1; +else ARSENIC = 0; +if Contaminant_Name = "BENZENE" then BENZENE = 1; +else BENZENE = 0; +if Contaminant_Name = "BENZO(B)FLUORANTHENE" then BENZO_B_FLUORANTHENE = 1; +else BENZO_B_FLUORANTHENE = 0; +if Contaminant_Name = "BENZO[A]PYRENE" then BENZO_A_PYRENE = 1; +else BENZO_A_PYRENE = 0; +if Contaminant_Name = "CADMIUM" then CADMIUM = 1; +else CADMIUM = 0; +if Contaminant_Name = "CHLOROETHENE (VINYL CHLORIDE)" then CVC = 1; +else CVC = 0; +if Contaminant_Name = "CHLOROFORM" then CHLOROFORM = 1; +else CHLOROFORM = 0; +if Contaminant_Name = "LEAD" then LEAD = 1; +else LEAD = 0; +if Contaminant_Name = "MERCURY" then MERCURY = 1; +else MERCURY = 0; +if Contaminant_Name = "POLYCHLORINATED BIPHENYLS (PCBs)" then PCBs = 1; +else PCBs = 0; +if Contaminant_Name = "POLYCYCLIC AROMATIC HYDROCARBONS (PAHS)" then PAHS = 1; +else PAHS = 0; +run; + +proc freq data = superfund_data2 noprint; +tables ARSENIC / out= superfund_data3; +by zip_code; +run; + +*Re-label Variables; +Data A; +Set superfund_data3; +rename count = num_ARSENIC; +label count = "num_ARSENIC"; +drop percent; +run; + +proc freq data = superfund_data2 noprint; +tables BENZENE / out= superfund_data4; +by zip_code; +run; + +*Re-label Variables; +Data B; +Set superfund_data4; +rename count = num_BENZENE; +label count = "num_BENZENE"; +drop percent; +run; + +proc freq data = superfund_data2 noprint; +tables BENZO_B_FLUORANTHENE / out= superfund_data5; +by zip_code; +run; + +*Re-label Variables; +Data C; +Set superfund_data5; +rename count = num_BENZO_B_FLUORANTHENE; +label count = "num_BENZO_B_FLUORANTHENE"; +drop percent; +run; + +proc freq data = superfund_data2 noprint; +tables BENZO_A_PYRENE / out= superfund_data6; +by zip_code; +run; + +*Re-label Variables; +Data D; +Set superfund_data6; +rename count = num_BENZO_A_PYRENE; +label count = "num_BENZO_A_PYRENE"; +drop percent; +run; + +proc freq data = superfund_data2 noprint; +tables CADMIUM / out= superfund_data7; +by zip_code; +run; + +*Re-label Variables; +Data E; +Set superfund_data7; +rename count = num_CADMIUM; +label count = "num_CADMIUM"; +drop percent; +run; + +proc freq data = superfund_data2 noprint; +tables CVC / out= superfund_data8; +by zip_code; +run; + +*Re-label Variables; +Data F; +Set superfund_data8; +rename count = num_CVC; +label count = "num_CVC"; +drop percent; +run; + +proc freq data = superfund_data2 noprint; +tables CHLOROFORM / out= superfund_data9; +by zip_code; +run; + +*Re-label Variables; +Data G; +Set superfund_data9; +rename count = num_CHLOROFORM; +label count = "num_CHLOROFORM"; +drop percent; +run; + +proc freq data = superfund_data2 noprint; +tables LEAD / out= superfund_data10; +by zip_code; +run; + +*Re-label Variables; +Data H; +Set superfund_data10; +rename count = num_LEAD; +label count = "num_LEAD"; +drop percent; +run; + +proc freq data = superfund_data2 noprint; +tables MERCURY / out= superfund_data11; +by zip_code; +run; + +*Re-label Variables; +Data I; +Set superfund_data11; +rename count = num_MERCURY; +label count = "num_MERCURY"; +drop percent; +run; + +proc freq data = superfund_data2 noprint; +tables PCBs / out= superfund_data12; +by zip_code; +run; + +*Re-label Variables; +Data J; +Set superfund_data12; +rename count = num_PCBs; +label count = "num_PCBs"; +drop percent; +run; + +proc freq data = superfund_data2 noprint; +tables PAHS / out= superfund_data13; +by zip_code; +run; + +*Re-label Variables; +Data K; +Set superfund_data13; +rename count = num_PAHS; +label count = "num_PAHS"; +drop percent; +run; + +*Merge Chemicals into One Dataset; + +Data Superfund_data14; +Merge A B C D E F G H I J K; +by zip_code; +run; + +*Create percentages for Chemicals; +Data L; +Set Superfund_data14; +Total_Chem = num_ARSENIC + num_BENZENE + num_BENZO_B_FLUORANTHENE + num_BENZO_A_PYRENE + num_CADMIUM + num_CVC + num_CHLOROFORM + num_LEAD + num_MERCURY + num_PCBs + num_PAHS; +Pct_ARSENIC = (divide(num_ARSENIC,Total_Chem))*100; +Pct_BENZENE = (divide(num_BENZENE,Total_Chem))*100; +Pct_BENZO_B_FLUORANTHENE = (divide(num_BENZO_B_FLUORANTHENE,Total_Chem))*100; +Pct_BENZO_A_PYRENE = (divide(num_BENZO_A_PYRENE,Total_Chem))*100; +Pct_CADMIUM = (divide(num_CADMIUM,Total_Chem))*100; +Pct_CVC = (divide(num_CVC,Total_Chem))*100; +Pct_CHLOROFORM = (divide(num_CHLOROFORM,Total_Chem))*100; +Pct_LEAD = (divide(num_LEAD,Total_Chem))*100; +Pct_MERCURY = (divide(num_MERCURY,Total_Chem))*100; +Pct_PCBs = (divide(num_PCBs,Total_Chem))*100; +Pct_PAHS = (divide(num_PAHS,Total_Chem))*100; +run; + +Data superfund_data15; +Merge superfund_data2 L; +by zip_code; +run; + +Data One; +Set superfund_data15; +if Year = 1 then Year_One = 1; +else Year_One = 0; +if Year = 2 then Year_Two = 1; +else Year_Two = 0; +if Year = 3 then Year_Three = 1; +else Year_Three = 0; +if Year = 4 then Year_Four = 1; +else Year_Four = 0; +if Year = 5 then Year_Five = 1; +else Year_Five = 0; +if Year = 6 then Year_Six = 1; +else Year_Six = 0; +if Year = 7 then Year_Seven = 1; +else Year_Seven = 0; +run; + +proc freq data = One; +tables Year Year_One Year_Two Year_Three Year_Four Year_Five Year_Six Year_Seven; +run; + +*Year_One Map; + +Data Year_One_Map; +Set One; +where Year_One = 1; +drop Year Year_Two Year_Three Year_Four Year_Five Year_Six Year_Seven; +run; + +*Year_Two Map; +Data Year_Two_Map; +Set One; +where Year_Two = 1; +drop Year Year_One Year_Three Year_Four Year_Five Year_Six Year_Seven; +run; + +*Year_Three Map; +Data Year_Three_Map; +Set One; +where Year_Three = 1; +drop Year Year_One Year_Two Year_Four Year_Five Year_Six Year_Seven; +run; + +*Year_Four Map; +Data Year_Four_MAp; +Set One; +where Year_Four = 1; +drop Year Year_One Year_Two Year_Three Year_Five Year_Six Year_Seven; +run; + +*Year_Five Map; +Data Year_Five_Map; +Set One; +where Year_Five = 1; +drop Year Year_One Year_Two Year_Three Year_Four Year_Six Year_Seven; +run; + +*Year_Six Map; +Data Year_Six_Map; +Set One; +where Year_Six = 1; +drop Year Year_One Year_Two Year_Three Year_Four Year_Five Year_Seven; +run; + +*Year_Seven Map; +Data Year_Seven_Map; +Set One; +where Year_Seven = 1; +drop Year Year_One Year_Two Year_Three Year_Four Year_Five Year_Six; +run; diff --git a/jenner-check/t002_acs_add_pct/autoexec.sas b/jenner-check/t002_acs_add_pct/autoexec.sas new file mode 100644 index 0000000..2052e87 --- /dev/null +++ b/jenner-check/t002_acs_add_pct/autoexec.sas @@ -0,0 +1 @@ +options obs=100; diff --git a/jenner-check/t002_acs_add_pct/expected.json b/jenner-check/t002_acs_add_pct/expected.json new file mode 100644 index 0000000..e90b688 --- /dev/null +++ b/jenner-check/t002_acs_add_pct/expected.json @@ -0,0 +1,20 @@ +{ + "_captured_at": "2026-06-17T18:11:59Z", + "_captured_run_id": "r_019ed6c7f1bf71629601a2b75a0eb042", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "NOTE: Read 5 rows from DATALINES.", + "NOTE: Wrote ACS_Data3 (5 rows, 39 columns).", + "NOTE: DATA ACS_Data2" + ], + "log_does_not_contain": [ + "ERROR:", + "[JENNER-ERROR", + "WARNING: Data in" + ], + "diagnostics": { + "parse_warnings": [], + "runtime_warnings": [] + } +} \ No newline at end of file diff --git a/jenner-check/t002_acs_add_pct/expected/files.md b/jenner-check/t002_acs_add_pct/expected/files.md new file mode 100644 index 0000000..e820f94 --- /dev/null +++ b/jenner-check/t002_acs_add_pct/expected/files.md @@ -0,0 +1,10 @@ +These URLs are tied to a specific Jenner run (`r_019ed6c7f1bf71629601a2b75a0eb042`) and expire when that run is reaped — re-running the bundle regenerates them. + + +## Datasets + +| name | rows | columns | preview_url | +|---|---|---|---| +| acs_data | 5 | GEO_display_label, Male, Female, White, Black, American_Indian_Alaska_Native,... | https://api.jenneranalytics.com/v1/run/r_019ed6c7f1bf71629601a2b75a0eb042/datasets/acs_data?token=8dc6b562316f4fd08408b6ae78fcf13b | +| acs_data2 | 5 | GEO_display_label, Male, Female, White, Black, AI_AN, Asian, Nat_Haw_Pac_Isla... | https://api.jenneranalytics.com/v1/run/r_019ed6c7f1bf71629601a2b75a0eb042/datasets/acs_data2?token=8dc6b562316f4fd08408b6ae78fcf13b | +| acs_data3 | 5 | GEO_display_label, Male, Female, White, Black, AI_AN, Asian, Nat_Haw_Pac_Isla... | https://api.jenneranalytics.com/v1/run/r_019ed6c7f1bf71629601a2b75a0eb042/datasets/acs_data3?token=8dc6b562316f4fd08408b6ae78fcf13b | diff --git a/jenner-check/t002_acs_add_pct/expected/log.txt b/jenner-check/t002_acs_add_pct/expected/log.txt new file mode 100644 index 0000000..33caeed --- /dev/null +++ b/jenner-check/t002_acs_add_pct/expected/log.txt @@ -0,0 +1,29 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 100. +NOTE: DATA WORK.ACS_Data + +NOTE: Processing inline DATALINES (5 lines) + +NOTE: Read 5 rows from DATALINES. +NOTE: Wrote WORK.ACS_Data (5 rows, 18 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA ACS_Data2 + + +NOTE: Read 5 rows from ACS_Data. +NOTE: Wrote ACS_Data2 (5 rows, 18 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA ACS_Data3 + + +NOTE: Read 5 rows from ACS_Data2. +NOTE: Wrote ACS_Data3 (5 rows, 39 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds diff --git a/jenner-check/t002_acs_add_pct/expected/output.txt b/jenner-check/t002_acs_add_pct/expected/output.txt new file mode 100644 index 0000000..e69de29 diff --git a/jenner-check/t002_acs_add_pct/input/ACS_Data.csv b/jenner-check/t002_acs_add_pct/input/ACS_Data.csv new file mode 100644 index 0000000..8df3579 --- /dev/null +++ b/jenner-check/t002_acs_add_pct/input/ACS_Data.csv @@ -0,0 +1,6 @@ +GEO_display_label,Male,Female,White,Black,American_Indian_Alaska_Native,Asian,Native_Hawaiian_Pacific_Islander,Other_Race,Two_Or_More_Races,Hispanic_or_Latino,Not_Hispanic_or_Latino,drove_car_or_truck_or_van,carpool_car_or_truck_or_van,public_transport_exclude_taxi,walked,Taxi_motorcycle_bicycle_other,worked_at_home +ZCTA5 00601,8809,8790,661,120,17,0,0,3641,135,66,17533,3048,124,0,209,42,54 +ZCTA5 00602,19898,21587,12085,1098,84,36,12,18430,540,4116,37369,7821,312,233,1244,198,410 +ZCTA5 00603,25286,28522,22356,2456,211,188,0,23980,957,8512,45296,12044,766,540,2188,410,622 +ZCTA5 00606,3055,3203,4188,213,0,0,0,1622,235,1488,4770,1320,88,0,366,41,102 +ZCTA5 00610,13133,14313,14920,1322,142,33,0,9320,709,5230,22216,5908,412,188,1066,211,348 diff --git a/jenner-check/t002_acs_add_pct/meta.json b/jenner-check/t002_acs_add_pct/meta.json new file mode 100644 index 0000000..d8d38c9 --- /dev/null +++ b/jenner-check/t002_acs_add_pct/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t002_acs_add_pct", + "source_file": "ACS2013_2017_Data_Add_Pct_v2.sas", + "source_blob_sha": "74144db80fd6ac2914683545135c9cbd0e633dae", + "source_commit": "2202011eef6e7a99d22c7ea97efd0bc044c2f0bd", + "tier": "real_data", + "notes": "ACS census demographic percentages. PROC IMPORT of the author's local xlsx replaced with an inline DSD comma-delimited DATA step carrying the same pre-rename columns; trailing interactive 'dm dexport' to local disk omitted. The rename steps and all divide()-based percentage calculations (sex, race, ethnicity, transportation) are unchanged; computed Pct_Male/Pct_White match the values in the repo's own output CSV." +} \ No newline at end of file diff --git a/jenner-check/t002_acs_add_pct/script.sas b/jenner-check/t002_acs_add_pct/script.sas new file mode 100644 index 0000000..995dacd --- /dev/null +++ b/jenner-check/t002_acs_add_pct/script.sas @@ -0,0 +1,64 @@ +/* +Version Author Start Date Last Update +--- --------------- ------------ --------------- +1.0 Linh Duong 02/27/2020 02/27/2020 + +Jenner-check note: the only adaptations to the upstream ACS2013_2017_Data_Add_Pct_v2.sas +are (a) the PROC IMPORT of the author's local ACS2013_2017_Data.xlsx is replaced with +an inline DATA step that loads a small sample carrying the same pre-rename columns the +script consumes, so the bundle is self-contained, and (b) the trailing interactive +"dm dexport" command (which wrote a CSV to the author's local disk) is omitted. The +rename steps and the divide()-based percentage calculations are unchanged. The sample +is also provided as ./input/ACS_Data.csv. +*/ + +data WORK.ACS_Data; + length GEO_display_label $ 32; + infile datalines dsd dlm=','; + input GEO_display_label $ Male Female White Black + American_Indian_Alaska_Native Asian Native_Hawaiian_Pacific_Islander + Other_Race Two_Or_More_Races Hispanic_or_Latino Not_Hispanic_or_Latino + drove_car_or_truck_or_van carpool_car_or_truck_or_van + public_transport_exclude_taxi walked Taxi_motorcycle_bicycle_other + worked_at_home; + datalines; +ZCTA5 00601,8809,8790,661,120,17,0,0,3641,135,66,17533,3048,124,0,209,42,54 +ZCTA5 00602,19898,21587,12085,1098,84,36,12,18430,540,4116,37369,7821,312,233,1244,198,410 +ZCTA5 00603,25286,28522,22356,2456,211,188,0,23980,957,8512,45296,12044,766,540,2188,410,622 +ZCTA5 00606,3055,3203,4188,213,0,0,0,1622,235,1488,4770,1320,88,0,366,41,102 +ZCTA5 00610,13133,14313,14920,1322,142,33,0,9320,709,5230,22216,5908,412,188,1066,211,348 +; +run; + +Data ACS_Data2; +set ACS_Data; +rename American_Indian_Alaska_Native = AI_AN; +rename Native_Hawaiian_Pacific_Islander = Nat_Haw_Pac_Islan; +rename public_transport_exclude_taxi = public_transport_excl_taxi; +rename Taxi_motorcycle_bicycle_other = Taxi_motorcycle_bicyc_other; +run; + +Data ACS_Data3; +set ACS_Data2; +Total_Sex = Male + Female; +Pct_Male = (divide(Male,Total_Sex))*100; +Pct_Female = (divide(Female,Total_Sex))*100; +Total_Race = White + Black + AI_AN + Asian + Nat_Haw_Pac_Islan + Other_Race + Two_Or_More_Races; +Pct_White = (divide(White,Total_Race))*100; +Pct_Black = (divide(Black,Total_Race))*100; +Pct_AI_AN = (divide(AI_AN,Total_Race))*100; +Pct_Asian = (divide(Asian,Total_Race))*100; +Pct_Nat_Haw_Pac_Islan = (divide(Nat_Haw_Pac_Islan,Total_Race))*100; +Pct_Other_Race = (divide(Other_Race,Total_Race))*100; +Pct_Two_Or_More_Races = (divide(Two_Or_More_Races,Total_Race))*100; +Total_Ethnicity = Hispanic_or_Latino + Not_Hispanic_or_Latino; +Pct_Hispanic_or_Latino = (divide(Hispanic_or_Latino,Total_Ethnicity))*100; +Pct_Not_Hispanic_or_Latino = (divide(Not_Hispanic_or_Latino,Total_Ethnicity))*100; +Type_of_transportation = drove_car_or_truck_or_van + carpool_car_or_truck_or_van + public_transport_excl_taxi + walked + Taxi_motorcycle_bicyc_other + worked_at_home; +Pct_drove_car_or_truck_or_van = (divide(drove_car_or_truck_or_van,Type_of_transportation))*100; +Pct_carpool_car_or_truck_or_van = (divide(carpool_car_or_truck_or_van,Type_of_transportation))*100; +Pct_public_transport_excl_taxi = (divide(public_transport_excl_taxi,Type_of_transportation))*100; +Pct_walked = (divide(walked,Type_of_transportation))*100; +Pct_Taxi_motorcycle_bicyc_other = (divide(Taxi_motorcycle_bicyc_other,Type_of_transportation))*100; +Pct_worked_at_home = (divide(worked_at_home,Type_of_transportation))*100; +run; diff --git a/jenner-check/t003_clean_merge_acs/autoexec.sas b/jenner-check/t003_clean_merge_acs/autoexec.sas new file mode 100644 index 0000000..2052e87 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/autoexec.sas @@ -0,0 +1 @@ +options obs=100; diff --git a/jenner-check/t003_clean_merge_acs/expected.json b/jenner-check/t003_clean_merge_acs/expected.json new file mode 100644 index 0000000..1d042f8 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/expected.json @@ -0,0 +1,20 @@ +{ + "_captured_at": "2026-06-17T18:12:55Z", + "_captured_run_id": "r_019ed6c9446c73508ca19be36f2ace9d", + "status": "ok", + "exit_code": 0, + "log_contains": [ + "NOTE: Wrote ACS (3 rows, 23 columns).", + "NOTE: DATA OneC", + "NOTE: DATA EightC" + ], + "log_does_not_contain": [ + "ERROR:", + "[JENNER-ERROR", + "WARNING: Data in" + ], + "diagnostics": { + "parse_warnings": [], + "runtime_warnings": [] + } +} \ No newline at end of file diff --git a/jenner-check/t003_clean_merge_acs/expected/files.md b/jenner-check/t003_clean_merge_acs/expected/files.md new file mode 100644 index 0000000..f3c0b42 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/expected/files.md @@ -0,0 +1,32 @@ +These URLs are tied to a specific Jenner run (`r_019ed6c9446c73508ca19be36f2ace9d`) and expire when that run is reaped — re-running the bundle regenerates them. + + +## Datasets + +| name | rows | columns | preview_url | +|---|---|---|---| +| acs | 3 | Male, Female, GEO_display_label, White, Black, American_Indian_Alaska_Native,... | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/acs?token=41c78f3729404638bc8e637fe9abb3b2 | +| eight | 3 | GEO_display_label, HC04_EST_VC01 | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/eight?token=41c78f3729404638bc8e637fe9abb3b2 | +| eightb | 3 | HC04_EST_VC01, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/eightb?token=41c78f3729404638bc8e637fe9abb3b2 | +| eightc | 3 | unemployment_rate_16_older, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/eightc?token=41c78f3729404638bc8e637fe9abb3b2 | +| five | 3 | GEO_display_label, HC01_EST_VC01 | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/five?token=41c78f3729404638bc8e637fe9abb3b2 | +| fiveb | 3 | HC01_EST_VC01, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/fiveb?token=41c78f3729404638bc8e637fe9abb3b2 | +| fivec | 3 | total_pop, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/fivec?token=41c78f3729404638bc8e637fe9abb3b2 | +| four | 3 | GEO_display_label, HD01_VD10, HD01_VD18, HD01_VD26, HD01_VD34, HD01_VD42, HD0... | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/four?token=41c78f3729404638bc8e637fe9abb3b2 | +| fourb | 3 | HD01_VD10, HD01_VD18, HD01_VD26, HD01_VD34, HD01_VD42, HD01_VD50, GEO_display... | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/fourb?token=41c78f3729404638bc8e637fe9abb3b2 | +| fourc | 3 | drove_car_or_truck_or_van, carpool_car_or_truck_or_van, public_transport_excl... | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/fourc?token=41c78f3729404638bc8e637fe9abb3b2 | +| one | 3 | GEO_display_label, HD01_VD02, HD01_VD26 | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/one?token=41c78f3729404638bc8e637fe9abb3b2 | +| oneb | 3 | HD01_VD02, HD01_VD26, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/oneb?token=41c78f3729404638bc8e637fe9abb3b2 | +| onec | 3 | Male, Female, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/onec?token=41c78f3729404638bc8e637fe9abb3b2 | +| seven | 3 | GEO_display_label, HC03_EST_VC01 | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/seven?token=41c78f3729404638bc8e637fe9abb3b2 | +| sevenb | 3 | HC03_EST_VC01, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/sevenb?token=41c78f3729404638bc8e637fe9abb3b2 | +| sevenc | 3 | percent_below_poverty_level, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/sevenc?token=41c78f3729404638bc8e637fe9abb3b2 | +| six | 3 | GEO_display_label, HC02_EST_VC17, HC02_EST_VC18 | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/six?token=41c78f3729404638bc8e637fe9abb3b2 | +| sixb | 3 | HC02_EST_VC17, HC02_EST_VC18, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/sixb?token=41c78f3729404638bc8e637fe9abb3b2 | +| sixc | 3 | percent_high_school_or_higher, percent_bachelors_or_higher, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/sixc?token=41c78f3729404638bc8e637fe9abb3b2 | +| three | 3 | GEO_display_label, HD01_VD02, HD01_VD03 | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/three?token=41c78f3729404638bc8e637fe9abb3b2 | +| threeb | 3 | HD01_VD02, HD01_VD03, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/threeb?token=41c78f3729404638bc8e637fe9abb3b2 | +| threec | 3 | Not_Hispanic_or_Latino, Hispanic_or_Latino, GEO_display_label | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/threec?token=41c78f3729404638bc8e637fe9abb3b2 | +| two | 3 | GEO_display_label, HD02_VD02, HD01_VD03, HD01_VD04, HD01_VD05, HD01_VD06, HD0... | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/two?token=41c78f3729404638bc8e637fe9abb3b2 | +| twob | 3 | HD02_VD02, HD01_VD03, HD01_VD04, HD01_VD05, HD01_VD06, HD01_VD07, HD01_VD08, ... | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/twob?token=41c78f3729404638bc8e637fe9abb3b2 | +| twoc | 3 | White, Black, American_Indian_Alaska_Native, Asian, Native_Hawaiian_Pacific_I... | https://api.jenneranalytics.com/v1/run/r_019ed6c9446c73508ca19be36f2ace9d/datasets/twoc?token=41c78f3729404638bc8e637fe9abb3b2 | diff --git a/jenner-check/t003_clean_merge_acs/expected/log.txt b/jenner-check/t003_clean_merge_acs/expected/log.txt new file mode 100644 index 0000000..758aec2 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/expected/log.txt @@ -0,0 +1,219 @@ +Jenner 0.1.0 (Unlicensed - limited to 100 observations) +Get a license at https://jenneranalytics.com/license + +NOTE: Option OBS changed to 100. +NOTE: DATA One + +NOTE: Processing inline DATALINES (3 lines) + +NOTE: Read 3 rows from DATALINES. +NOTE: Wrote One (3 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Two + +NOTE: Processing inline DATALINES (3 lines) + +NOTE: Read 3 rows from DATALINES. +NOTE: Wrote Two (3 rows, 8 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Three + +NOTE: Processing inline DATALINES (3 lines) + +NOTE: Read 3 rows from DATALINES. +NOTE: Wrote Three (3 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Four + +NOTE: Processing inline DATALINES (3 lines) + +NOTE: Read 3 rows from DATALINES. +NOTE: Wrote Four (3 rows, 7 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Five + +NOTE: Processing inline DATALINES (3 lines) + +NOTE: Read 3 rows from DATALINES. +NOTE: Wrote Five (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Six + +NOTE: Processing inline DATALINES (3 lines) + +NOTE: Read 3 rows from DATALINES. +NOTE: Wrote Six (3 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Seven + +NOTE: Processing inline DATALINES (3 lines) + +NOTE: Read 3 rows from DATALINES. +NOTE: Wrote Seven (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA Eight + +NOTE: Processing inline DATALINES (3 lines) + +NOTE: Read 3 rows from DATALINES. +NOTE: Wrote Eight (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA OneB + + +NOTE: Read 3 rows from One. +NOTE: Wrote OneB (3 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA OneC + + +NOTE: Read 3 rows from OneB. +NOTE: Wrote OneC (3 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA TwoB + + +NOTE: Read 3 rows from Two. +NOTE: Wrote TwoB (3 rows, 8 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA TwoC + + +NOTE: Read 3 rows from TwoB. +NOTE: Wrote TwoC (3 rows, 8 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA ThreeB + + +NOTE: Read 3 rows from Three. +NOTE: Wrote ThreeB (3 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA ThreeC + + +NOTE: Read 3 rows from ThreeB. +NOTE: Wrote ThreeC (3 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA FourB + + +NOTE: Read 3 rows from Four. +NOTE: Wrote FourB (3 rows, 7 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA FourC + + +NOTE: Read 3 rows from FourB. +NOTE: Wrote FourC (3 rows, 7 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA FiveB + + +NOTE: Read 3 rows from Five. +NOTE: Wrote FiveB (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA FiveC + + +NOTE: Read 3 rows from FiveB. +NOTE: Wrote FiveC (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA SixB + + +NOTE: Read 3 rows from Six. +NOTE: Wrote SixB (3 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA SixC + + +NOTE: Read 3 rows from SixB. +NOTE: Wrote SixC (3 rows, 3 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA SevenB + + +NOTE: Read 3 rows from Seven. +NOTE: Wrote SevenB (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA SevenC + + +NOTE: Read 3 rows from SevenB. +NOTE: Wrote SevenC (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA EightB + + +NOTE: Read 3 rows from Eight. +NOTE: Wrote EightB (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA EightC + + +NOTE: Read 3 rows from EightB. +NOTE: Wrote EightC (3 rows, 2 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds +NOTE: DATA ACS + +NOTE: Stream 1 processed 3 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 2 processed 3 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 3 processed 3 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 4 processed 3 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 5 processed 3 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 6 processed 3 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 7 processed 3 rows, max BY-group size: 1 (O(1) memory verified) +NOTE: Stream 8 processed 3 rows, max BY-group size: 1 (O(1) memory verified) + +NOTE: Wrote ACS (3 rows, 23 columns). +NOTE: DATA elapsed: + wall 0.00 seconds + cpu 0.00 seconds diff --git a/jenner-check/t003_clean_merge_acs/expected/output.txt b/jenner-check/t003_clean_merge_acs/expected/output.txt new file mode 100644 index 0000000..e69de29 diff --git a/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B01001.csv b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B01001.csv new file mode 100644 index 0000000..0286e27 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B01001.csv @@ -0,0 +1,4 @@ +GEO_display_label,HD01_VD02,HD01_VD26 +ZCTA5 00601,8809,8790 +ZCTA5 00602,19898,21587 +ZCTA5 00603,25286,28522 diff --git a/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B02001.csv b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B02001.csv new file mode 100644 index 0000000..15ebcbc --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B02001.csv @@ -0,0 +1,4 @@ +GEO_display_label,HD02_VD02,HD01_VD03,HD01_VD04,HD01_VD05,HD01_VD06,HD01_VD07,HD01_VD08 +ZCTA5 00601,661,120,17,0,0,3641,135 +ZCTA5 00602,12085,1098,84,36,12,18430,540 +ZCTA5 00603,22356,2456,211,188,0,23980,957 diff --git a/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B03003.csv b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B03003.csv new file mode 100644 index 0000000..699d000 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B03003.csv @@ -0,0 +1,4 @@ +GEO_display_label,HD01_VD02,HD01_VD03 +ZCTA5 00601,66,17533 +ZCTA5 00602,4116,37369 +ZCTA5 00603,8512,45296 diff --git a/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B08101.csv b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B08101.csv new file mode 100644 index 0000000..501fac5 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_B08101.csv @@ -0,0 +1,4 @@ +GEO_display_label,HD01_VD10,HD01_VD18,HD01_VD26,HD01_VD34,HD01_VD42,HD01_VD50 +ZCTA5 00601,3048,124,0,209,42,54 +ZCTA5 00602,7821,312,233,1244,198,410 +ZCTA5 00603,12044,766,540,2188,410,622 diff --git a/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S0101.csv b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S0101.csv new file mode 100644 index 0000000..354684b --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S0101.csv @@ -0,0 +1,4 @@ +GEO_display_label,HC01_EST_VC01 +ZCTA5 00601,17599 +ZCTA5 00602,41485 +ZCTA5 00603,53808 diff --git a/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S1501.csv b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S1501.csv new file mode 100644 index 0000000..5c3fadb --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S1501.csv @@ -0,0 +1,4 @@ +GEO_display_label,HC02_EST_VC17,HC02_EST_VC18 +ZCTA5 00601,63.9,18.1 +ZCTA5 00602,70.2,21.4 +ZCTA5 00603,74.6,25.0 diff --git a/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S1701.csv b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S1701.csv new file mode 100644 index 0000000..a537c70 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S1701.csv @@ -0,0 +1,4 @@ +GEO_display_label,HC03_EST_VC01 +ZCTA5 00601,64.3 +ZCTA5 00602,52.1 +ZCTA5 00603,48.7 diff --git a/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S2301.csv b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S2301.csv new file mode 100644 index 0000000..810f281 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/input/ACS_17_5YR_S2301.csv @@ -0,0 +1,4 @@ +GEO_display_label,HC04_EST_VC01 +ZCTA5 00601,41.2 +ZCTA5 00602,33.0 +ZCTA5 00603,28.5 diff --git a/jenner-check/t003_clean_merge_acs/meta.json b/jenner-check/t003_clean_merge_acs/meta.json new file mode 100644 index 0000000..ece8f96 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/meta.json @@ -0,0 +1,8 @@ +{ + "bundle": "t003_clean_merge_acs", + "source_file": "Cleaning and Merging ACS Data v3.sas", + "source_blob_sha": "0c40aab58e2f39b978f12d8f8effbe3b3a230c5c", + "source_commit": "2202011eef6e7a99d22c7ea97efd0bc044c2f0bd", + "tier": "real_data", + "notes": "ACS Census table cleaning + merge pipeline. The 8 PROC IMPORTs of the author's local Census CSVs are replaced with inline DSD comma-delimited DATA steps carrying the same coded columns each step keeps (HD01_VD*, HC0*_EST_VC*), pre-sorted by GEO_display_label; trailing PROC EXPORT-to-xlsx omitted. All keep/rename steps and the final 8-way by-GEO merge are unchanged; ACS assembles to 3 joined rows." +} \ No newline at end of file diff --git a/jenner-check/t003_clean_merge_acs/script.sas b/jenner-check/t003_clean_merge_acs/script.sas new file mode 100644 index 0000000..f430577 --- /dev/null +++ b/jenner-check/t003_clean_merge_acs/script.sas @@ -0,0 +1,203 @@ +/* +Version Author Start Date Last Update +--- --------------- ------------ --------------- +1.0 Linh Duong 02/26/2020 02/26/2020 + +Jenner-check note: the only adaptations to the upstream "Cleaning and Merging ACS +Data v3.sas" are (a) the eight PROC IMPORTs of the author's local American Community +Survey CSVs are replaced with inline DATA steps that load a small sample carrying the +same Census-coded columns each step keeps (HD01_VD*, HC0*_EST_VC*, GEO_display_label), +so the bundle is self-contained and sorted by GEO_display_label for the merge, and +(b) the trailing PROC EXPORT-to-xlsx (local disk) is omitted. Every keep, rename, and +the final by-GEO merge that assembles the analysis dataset are unchanged. +*/ + +data One; + length GEO_display_label $ 32; + infile datalines dsd dlm=','; + input GEO_display_label $ HD01_VD02 HD01_VD26; + datalines; +ZCTA5 00601,8809,8790 +ZCTA5 00602,19898,21587 +ZCTA5 00603,25286,28522 +; +run; + +data Two; + length GEO_display_label $ 32; + infile datalines dsd dlm=','; + input GEO_display_label $ HD02_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08; + datalines; +ZCTA5 00601,661,120,17,0,0,3641,135 +ZCTA5 00602,12085,1098,84,36,12,18430,540 +ZCTA5 00603,22356,2456,211,188,0,23980,957 +; +run; + +data Three; + length GEO_display_label $ 32; + infile datalines dsd dlm=','; + input GEO_display_label $ HD01_VD02 HD01_VD03; + datalines; +ZCTA5 00601,66,17533 +ZCTA5 00602,4116,37369 +ZCTA5 00603,8512,45296 +; +run; + +data Four; + length GEO_display_label $ 32; + infile datalines dsd dlm=','; + input GEO_display_label $ HD01_VD10 HD01_VD18 HD01_VD26 HD01_VD34 HD01_VD42 HD01_VD50; + datalines; +ZCTA5 00601,3048,124,0,209,42,54 +ZCTA5 00602,7821,312,233,1244,198,410 +ZCTA5 00603,12044,766,540,2188,410,622 +; +run; + +data Five; + length GEO_display_label $ 32; + infile datalines dsd dlm=','; + input GEO_display_label $ HC01_EST_VC01; + datalines; +ZCTA5 00601,17599 +ZCTA5 00602,41485 +ZCTA5 00603,53808 +; +run; + +data Six; + length GEO_display_label $ 32; + infile datalines dsd dlm=','; + input GEO_display_label $ HC02_EST_VC17 HC02_EST_VC18; + datalines; +ZCTA5 00601,63.9,18.1 +ZCTA5 00602,70.2,21.4 +ZCTA5 00603,74.6,25.0 +; +run; + +data Seven; + length GEO_display_label $ 32; + infile datalines dsd dlm=','; + input GEO_display_label $ HC03_EST_VC01; + datalines; +ZCTA5 00601,64.3 +ZCTA5 00602,52.1 +ZCTA5 00603,48.7 +; +run; + +data Eight; + length GEO_display_label $ 32; + infile datalines dsd dlm=','; + input GEO_display_label $ HC04_EST_VC01; + datalines; +ZCTA5 00601,41.2 +ZCTA5 00602,33.0 +ZCTA5 00603,28.5 +; +run; + + +Data OneB; +Set One; +keep HD01_VD02 HD01_VD26 GEO_display_label; +run; + +Data OneC; +Set OneB; +rename HD01_VD02 = Male; +rename HD01_VD26 = Female; +run; + +Data TwoB; +Set Two; +keep HD02_VD02 HD01_VD03 HD01_VD04 HD01_VD05 HD01_VD06 HD01_VD07 HD01_VD08 GEO_display_label; +run; + +Data TwoC; +Set TwoB; +rename HD02_VD02 = White; +rename HD01_VD03 = Black; +rename HD01_VD04 = American_Indian_Alaska_Native; +rename HD01_VD05 = Asian; +rename HD01_VD06 = Native_Hawaiian_Pacific_Islander; +rename HD01_VD07 = Other_Race; +rename HD01_VD08 = Two_Or_More_Races; +run; + +Data ThreeB; +Set Three; +keep HD01_VD02 HD01_VD03 GEO_display_label; +run; + +Data ThreeC; +Set ThreeB; +rename HD01_VD02 = Not_Hispanic_or_Latino; +rename HD01_VD03 = Hispanic_or_Latino; +run; + +Data FourB; +Set Four; +keep HD01_VD10 HD01_VD18 HD01_VD26 HD01_VD34 HD01_VD42 HD01_VD50 GEO_display_label; +run; + +Data FourC; +Set FourB; +rename HD01_VD10 = drove_car_or_truck_or_van; +rename HD01_VD18 = carpool_car_or_truck_or_van; +rename HD01_VD26 = public_transport_exclude_taxi; +rename HD01_VD34 = walked; +rename HD01_VD42 = Taxi_motorcycle_bicycle_other; +rename HD01_VD50 = worked_at_home; +run; + +Data FiveB; +Set Five; +keep HC01_EST_VC01 GEO_display_label; +run; + +Data FiveC; +Set FiveB; +rename HC01_EST_VC01 = total_pop; +run; + +Data SixB; +Set Six; +keep HC02_EST_VC17 HC02_EST_VC18 GEO_display_label; +run; + +Data SixC; +Set SixB; +rename HC02_EST_VC17 = percent_high_school_or_higher; +rename HC02_EST_VC18 = percent_bachelors_or_higher; +run; + +Data SevenB; +Set Seven; +keep HC03_EST_VC01 GEO_display_label; +run; + +Data SevenC; +Set SevenB; +rename HC03_EST_VC01 = percent_below_poverty_level; +run; + +Data EightB; +Set Eight; +keep HC04_EST_VC01 GEO_display_label; +run; + +Data EightC; +Set EightB; +rename HC04_EST_VC01 = unemployment_rate_16_older; +run; + +*Merge Data; + +Data ACS; +Merge OneC TwoC ThreeC FourC FiveC SixC SevenC EightC; +by GEO_display_label; +run;