Skip to content

Commit 84edab8

Browse files
authored
Merge pull request #414 from coderxio/rxclass-fix
RxClass fix
2 parents 8bf6a75 + bb2ccc8 commit 84edab8

11 files changed

Lines changed: 924 additions & 13 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ We would love to see you contribute to SageRx. Join our [Slack](https://join.sla
6060

6161
On `docker-compose up` a dbt container will be created to be used for cli commands. To enter commands run `docker exec -it dbt /bin/bash`. This will place you into a bash session in the dbt container. Then you can run dbt commands as you normally would.
6262

63-
To serve dbt documentation locally, enter the commands in the dbt container `dbt docs generate` then `dbt docs serve --port 8081`. They should generate on `http://localhost:8081`
63+
To serve dbt documentation locally, enter the commands in the dbt container `dbt docs generate` then `dbt docs serve --host 0.0.0.0 --port 8081`. They should generate on `http://localhost:8081`
6464

6565
### Integrating with AWS
6666

airflow/dags/rxclass/dag_tasks.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ def load(file_path_str:str):
4848

4949
classes = []
5050
for result in results:
51+
# skip a result if it is None
52+
if result is None:
53+
continue
5154
response = result['response']
5255
if 'rxclassDrugInfoList' in response:
5356
for drug_info in response["rxclassDrugInfoList"]["rxclassDrugInfo"]:

airflow/dags/rxnorm_historical/dag_tasks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def load(file_path_str:str):
4545
# Initialize a list to store the processed data
4646
records = []
4747
for result in results:
48-
if not len(result['response']) == 0:
48+
if result is not None and not len(result['response']) == 0:
4949
response = result['response']
5050
if 'historicalNdcConcept' in response:
5151
url = result['url']
@@ -72,6 +72,7 @@ def load(file_path_str:str):
7272
# Create a single DataFrame from the list of dictionaries
7373
df = pd.DataFrame.from_records(records)
7474
print(f'Processed {len(df)} RXCUIs.')
75+
print(df.head(10))
7576

7677
# Load the final DataFrame into the database
7778
load_df_to_pg(df, "sagerx_lake", "rxnorm_historical", "replace", index=False, create_index=True, index_columns=['ndc', 'end_date'])

dbt/sagerx/models/marts/classification/clinical_products_to_diseases.sql renamed to dbt/sagerx/models/marts/clinical_products_to_diseases.sql

File renamed without changes.
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
version: 2
2+
3+
models:
4+
- name: stg_cms_hcris__hospital
5+
description: >
6+
Staging model for hospital data from the CMS Healthcare Cost Report Information System (HCRIS).
7+
This model joins the report (rpt), alpha (alphanumeric), and numeric (nmrc) source tables to create
8+
a normalized hospital record with provider information, ownership details, address, bed counts,
9+
resident counts, and financial metrics. The model extracts data from specific worksheet codes and
10+
line/column combinations to build a comprehensive hospital profile from the HCRIS cost report data.
11+
12+
columns:
13+
- name: cms_id
14+
description: >
15+
CMS Provider Number. A six-digit identifier where the first two digits represent the state code
16+
and the remaining four digits represent the assigned provider range. This is the primary identifier
17+
for hospitals in the CMS system.
18+
tests:
19+
- not_null
20+
21+
- name: npi
22+
description: >
23+
National Provider Identifier. A unique 10-digit identifier for healthcare providers established
24+
under HIPAA. May be null for some hospitals that haven't registered an NPI.
25+
26+
- name: ownership_code
27+
description: >
28+
Provider Control Type Code. A numeric code (1-13) indicating the type of ownership structure
29+
from Table 3A of the HCRIS specifications. Used to categorize hospitals by ownership type.
30+
tests:
31+
- accepted_values:
32+
values: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13']
33+
34+
- name: ownership_type
35+
description: >
36+
Human-readable ownership type description. Maps ownership codes to descriptive categories:
37+
voluntary nonprofit (church/other), proprietary (individual/corporation/partnership/other),
38+
governmental (federal/city-county/county/state/hospital district/city/other), or unknown.
39+
tests:
40+
- not_null
41+
- accepted_values:
42+
values:
43+
- 'voluntary nonprofit-church'
44+
- 'voluntary nonprofit-other'
45+
- 'proprietary-individual'
46+
- 'proprietary-corporation'
47+
- 'proprietary-partnership'
48+
- 'proprietary-other'
49+
- 'governmental-federal'
50+
- 'governmental-city-county'
51+
- 'governmental-county'
52+
- 'governmental-state'
53+
- 'governmental-hospital district'
54+
- 'governmental-city'
55+
- 'governmental-other'
56+
- 'unknown'
57+
58+
- name: hospital_name
59+
description: >
60+
Hospital name extracted from worksheet S200001, line 00300, column 00100. The official name
61+
of the hospital as reported in the HCRIS cost report.
62+
63+
- name: street_address
64+
description: >
65+
Street address of the hospital extracted from worksheet S200001, line 00100, column 00100.
66+
The physical street address where the hospital is located.
67+
68+
- name: city
69+
description: >
70+
City where the hospital is located, extracted from worksheet S200001, line 00200, column 00100.
71+
The city component of the hospital's address.
72+
73+
- name: state
74+
description: >
75+
State where the hospital is located, extracted from worksheet S200001, line 00200, column 00200.
76+
The state component of the hospital's address, typically a two-letter state code.
77+
78+
- name: zip
79+
description: >
80+
ZIP code where the hospital is located, extracted from worksheet S200001, line 00200, column 00300.
81+
The postal code component of the hospital's address.
82+
83+
- name: hosp_bed_count
84+
description: >
85+
Number of hospital beds, extracted from worksheet S300001, line 01400, column 00200.
86+
Represents the total number of licensed hospital beds available for patient care.
87+
88+
- name: ld_bed_count
89+
description: >
90+
Number of long-term care (LTC) beds, extracted from worksheet S300001, line 03200, column 00200.
91+
Represents beds designated for long-term care services within the hospital facility.
92+
93+
- name: total_bed_count
94+
description: >
95+
Total bed count calculated as the sum of hospital beds and long-term care beds.
96+
Uses COALESCE to handle null values by treating them as 0. This provides a comprehensive
97+
count of all beds in the facility.
98+
99+
- name: resident_count
100+
description: >
101+
Number of residents (medical residents), extracted from worksheet S300001, line 01400, column 00900.
102+
Represents the count of medical residents training at the hospital, which may affect Medicare
103+
reimbursement calculations.
104+
105+
- name: total_revenue
106+
description: >
107+
Total revenue for the hospital, extracted from worksheet G300000, line 00200, column 00100.
108+
Represents the total revenue reported in the cost report, including all sources of income.
109+
110+
- name: net_revenue
111+
description: >
112+
Net revenue for the hospital, extracted from worksheet G300000, line 00300, column 00100.
113+
Represents the net revenue after deductions, as reported in the HCRIS cost report.
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
version: 2
2+
3+
sources:
4+
- name: cms_hcris
5+
schema: sagerx_lake
6+
description: Healthcare Cost Report Information System (HCRIS) data tables
7+
tables:
8+
- name: cms_hcris_rpt
9+
description: Report table containing cost report information
10+
columns:
11+
- name: rpt_rec_num
12+
description: Report Record Number. HCRIS assigned cost report specific number.
13+
data_type: string
14+
tests:
15+
- not_null
16+
17+
- name: prvdr_num
18+
description: Provider Number. Valid Provider Number defined as follows xxyyyy where xx = State Code and yyyy = Assigned Provider Range
19+
data_type: string
20+
tests:
21+
- not_null
22+
23+
- name: rpt_stus_cd
24+
description: Report Status Code. Type of cost report.
25+
data_type: string
26+
tests:
27+
- not_null
28+
- accepted_values:
29+
values: ['1', '2', '3', '4', '5']
30+
31+
- name: npi
32+
description: National Provider Identifier. Unique health identifier for health care providers. Established under HIPAA.
33+
data_type: string
34+
35+
- name: fy_bgn_dt
36+
description: Fiscal Year Begin Date. Cost Report Fiscal Year beginning date.
37+
data_type: date
38+
39+
- name: fy_end_dt
40+
description: Fiscal Year End Date. Cost Report Fiscal Year ending date.
41+
data_type: date
42+
43+
- name: proc_dt
44+
description: Process Date. The date the cost report was processed into HCRIS.
45+
data_type: date
46+
47+
- name: initl_rpt_sw
48+
description: Initial Report Switch. Y or N, Y = the first cost report filed for this provider. (Not actively used.)
49+
data_type: string
50+
51+
- name: last_rpt_sw
52+
description: Last Report Switch. Y or N, Y = the final cost report filed for this provider. (Not actively used.)
53+
data_type: string
54+
55+
- name: trnsmtl_num
56+
description: The current transmittal or version number in effect for each sub-system. Transmittal Number or transmittal version used to create the cost report
57+
data_type: string
58+
59+
- name: fi_num
60+
description: Fiscal Intermediary Number. Fiscal Intermediary Number in effect at the time of cost report filing.
61+
data_type: string
62+
63+
- name: adr_vndr_cd
64+
description: Automated Desk Review Vendor Code. Vendor for Fiscal Intermediary.
65+
data_type: string
66+
67+
- name: fi_rcpt_dt
68+
description: Fiscal Intermediary Receipt Date. Date cost report was received by Fiscal Intermediary.
69+
data_type: date
70+
71+
- name: util_cd
72+
description: Utilization Code. Level of Medicare utilization of filed cost report.
73+
data_type: string
74+
75+
- name: npr_dt
76+
description: Notice of Program Reimbursement Date. Date Provider received NPR.
77+
data_type: date
78+
79+
- name: spec_ind
80+
description: Special Indicator. HCRIS code used for special purposes.
81+
data_type: string
82+
83+
- name: fi_creat_dt
84+
description: Fiscal Intermediary Create Date. Date the FI created the HCRIS file.
85+
data_type: date
86+
87+
- name: prvdr_ctrl_type_cd
88+
description: Provider Control Type Code. Type of ownership from Table 3A of Specifications.
89+
data_type: string
90+
91+
- name: cms_hcris_alpha
92+
description: Alpha table containing provider reported alphanumeric data
93+
columns:
94+
- name: rpt_rec_num
95+
description: Report Record Number. HCRIS assigned cost report specific number.
96+
data_type: string
97+
tests:
98+
- not_null
99+
100+
- name: wksht_cd
101+
description: Worksheet Identifier. Valid worksheets are defined for each subsystem in other documentation.
102+
data_type: string
103+
tests:
104+
- not_null
105+
106+
- name: line_num
107+
description: Line Number. Valid Line Number defined as follows xxxyy where xxx = Line Number and yy = Sub-Line Number
108+
data_type: string
109+
tests:
110+
- not_null
111+
112+
- name: clmn_num
113+
description: Column Number. Valid Column Number defined as follows xxxyy where xxx = Column Number and yy = Sub-Column Number
114+
data_type: string
115+
tests:
116+
- not_null
117+
118+
- name: alphnmrc_itm_txt
119+
description: Alphanumeric Item Text. Provider reported alpha data.
120+
data_type: string
121+
tests:
122+
- not_null
123+
124+
- name: cms_hcris_nmrc
125+
description: Numeric table containing provider reported numeric data
126+
columns:
127+
- name: rpt_rec_num
128+
description: Report Record Number. HCRIS assigned cost report specific number.
129+
data_type: string
130+
tests:
131+
- not_null
132+
133+
- name: wksht_cd
134+
description: Worksheet Identifier. Valid worksheets are defined for each subsystem in other documentation.
135+
data_type: string
136+
tests:
137+
- not_null
138+
139+
- name: line_num
140+
description: Line Number. Valid Line Number defined as follows xxxyy where xxx = Line Number and yy = Sub-Line Number
141+
data_type: string
142+
tests:
143+
- not_null
144+
145+
- name: clmn_num
146+
description: Column Number. Valid Column Number defined as follows xxxyy where xxx = Column Number and yy = Sub-Column Number
147+
data_type: string
148+
tests:
149+
- not_null
150+
151+
- name: itm_val_num
152+
description: Item Value Number. Provider reported numeric data.
153+
data_type: numeric
154+
tests:
155+
- not_null

dbt/sagerx/models/staging/cms_hcris/stg_cms_hcris__hospital.sql

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,23 @@
11
with
22

3+
rpt as (
4+
5+
select * from {{ source('cms_hcris', 'cms_hcris_rpt') }}
6+
7+
),
8+
9+
alpha as (
10+
11+
select * from {{ source('cms_hcris', 'cms_hcris_alpha') }}
12+
13+
),
14+
15+
nmrc as (
16+
17+
select * from {{ source('cms_hcris', 'cms_hcris_nmrc') }}
18+
19+
),
20+
321
final as (
422

523
select
@@ -33,53 +51,53 @@ final as (
3351
residents.itm_val_num as resident_count,
3452
revenue.itm_val_num as total_revenue,
3553
net_rev.itm_val_num as net_revenue
36-
from sagerx_lake.cms_hcris_rpt rpt
54+
from rpt
3755
left join sagerx_lake.cms_hcris_alpha name
3856
on rpt.rpt_rec_num = name.rpt_rec_num
3957
and name.wksht_cd = 'S200001'
4058
and name.line_num = '00300'
4159
and name.clmn_num = '00100'
42-
left join sagerx_lake.cms_hcris_alpha street
60+
left join alpha street
4361
on rpt.rpt_rec_num = street.rpt_rec_num
4462
and street.wksht_cd = 'S200001'
4563
and street.line_num = '00100'
4664
and street.clmn_num = '00100'
47-
left join sagerx_lake.cms_hcris_alpha city
65+
left join alpha city
4866
on rpt.rpt_rec_num = city.rpt_rec_num
4967
and city.wksht_cd = 'S200001'
5068
and city.line_num = '00200'
5169
and city.clmn_num = '00100'
52-
left join sagerx_lake.cms_hcris_alpha state
70+
left join alpha state
5371
on rpt.rpt_rec_num = state.rpt_rec_num
5472
and state.wksht_cd = 'S200001'
5573
and state.line_num = '00200'
5674
and state.clmn_num = '00200'
57-
left join sagerx_lake.cms_hcris_alpha zip
75+
left join alpha zip
5876
on rpt.rpt_rec_num = zip.rpt_rec_num
5977
and zip.wksht_cd = 'S200001'
6078
and zip.line_num = '00200'
6179
and zip.clmn_num = '00300'
62-
left join sagerx_lake.cms_hcris_nmrc hosp_beds
80+
left join nmrc hosp_beds
6381
on rpt.rpt_rec_num = hosp_beds.rpt_rec_num
6482
and hosp_beds.wksht_cd = 'S300001'
6583
and hosp_beds.line_num = '01400'
6684
and hosp_beds.clmn_num = '00200'
67-
left join sagerx_lake.cms_hcris_nmrc ld_beds
85+
left join nmrc ld_beds
6886
on rpt.rpt_rec_num = ld_beds.rpt_rec_num
6987
and ld_beds.wksht_cd = 'S300001'
7088
and ld_beds.line_num = '03200'
7189
and ld_beds.clmn_num = '00200'
72-
left join sagerx_lake.cms_hcris_nmrc residents
90+
left join nmrc residents
7391
on rpt.rpt_rec_num = residents.rpt_rec_num
7492
and residents.wksht_cd = 'S300001'
7593
and residents.line_num = '01400'
7694
and residents.clmn_num = '00900'
77-
left join sagerx_lake.cms_hcris_nmrc revenue
95+
left join nmrc revenue
7896
on rpt.rpt_rec_num = revenue.rpt_rec_num
7997
and revenue.wksht_cd = 'G300000'
8098
and revenue.line_num = '00200'
8199
and revenue.clmn_num = '00100'
82-
left join sagerx_lake.cms_hcris_nmrc net_rev
100+
left join nmrc net_rev
83101
on rpt.rpt_rec_num = net_rev.rpt_rec_num
84102
and net_rev.wksht_cd = 'G300000'
85103
and net_rev.line_num = '00300'

0 commit comments

Comments
 (0)