Skip to content

Commit f5900c7

Browse files
authored
Who Treatment Outcome TB and HIV (#1937)
* Who Treatment Outcome TB and HIV * resolved gemini review * internal comments * resolved core team comments
1 parent 1539f93 commit f5900c7

8 files changed

Lines changed: 1142 additions & 0 deletions

File tree

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# WHO Treatment Outcome for TB and HIV
2+
3+
- source: https://data.who.int/indicators/i/DCDC2EB/625E736
4+
5+
- description: Percentage of people with TB/HIV who started dug-susceptible tuberculosis treatment and whose treatment outcome was recorded as treatment success (cured or treatment completed), treatment failed, died, lost to follow-up, or not evaluated, within the reporting period.
6+
7+
- type of place: Country Data
8+
9+
- statvars: Health
10+
11+
- years: 2012 to 2023
12+
13+
- place_resolution: manually.
14+
15+
### Release Frequency: P1Y
16+
17+
### How to run:
18+
19+
- To download the input file
20+
21+
`python3 tb_data_download_who.py`
22+
23+
- To process the input file
24+
25+
`python3 ../../../tools/statvar_importer/stat_var_processor.py --input_data=input_files/Tuberculosis_outcome_TB_HIV.csv --pv_map=tuberculosis_outcome_pvmap.csv --config_file=metadata.csv --output_path=output/tuberculosis_output --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf `
26+
27+
#### Refresh type: Fully Autorefresh
28+
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"import_specifications": [
3+
{
4+
"import_name": "WHO_TuberculosisAndHIVTreatmentOutcome",
5+
"curator_emails": [
6+
"support@datacommons.org"
7+
],
8+
"provenance_url": "https://data.who.int/indicators/i/DCDC2EB/625E736",
9+
"provenance_description": "Percentage of people with TB/HIV who started dug-susceptible TB treatment and whose treatment outcome was recorded as treatment success (cured or treatment completed), treatment failed, died, lost to follow-up, or not evaluated, within the reporting period.",
10+
"scripts": [
11+
"tb_data_download_who.py",
12+
"../../../tools/statvar_importer/stat_var_processor.py --input_data=input_files/Tuberculosis_outcome_TB_HIV.csv --pv_map=tuberculosis_outcome_pvmap.csv --config_file=metadata.csv --output_path=output/tuberculosis_hiv_output --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf"
13+
],
14+
"import_inputs": [
15+
{
16+
"template_mcf": "output/tuberculosis_hiv_output.tmcf",
17+
"cleaned_csv": "output/tuberculosis_hiv_output.csv"
18+
}
19+
],
20+
"source_files": [
21+
"input_files/Tuberculosis_outcome_TB_HIV.csv"
22+
],
23+
"cron_schedule": "0 10 10,21 * *"
24+
}
25+
]
26+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
config,value
2+
mapped_rows,1
3+
mapped_columns,6
4+
output_columns,"observationDate,observationAbout,variableMeasured,value,unit,scalingFactor"
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import os
2+
import requests
3+
import io
4+
import pandas as pd
5+
import logging
6+
7+
# Configure logging
8+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
9+
10+
def download_who_data():
11+
# 1. Get the Clean Data from the API using the new Indicator ID
12+
api_url = "https://xmart-api-public.who.int/DATA_/RELAY_TB_DATA"
13+
params = {
14+
"$filter": "IND_ID eq 'DCDC2EB625E736'",
15+
#"$select": "IND_ID,INDICATOR_NAME,YEAR,COUNTRY,VALUE",
16+
"$format": "csv"
17+
}
18+
19+
logging.info("1. Fetching clean percentage data from WHO API...")
20+
api_response = requests.get(api_url, params=params)
21+
22+
if api_response.status_code != 200:
23+
logging.info(f"Failed to fetch API data. HTTP {api_response.status_code}")
24+
return
25+
26+
# Load the clean API data into a pandas table
27+
api_df = pd.read_csv(io.StringIO(api_response.text))
28+
29+
# 2. Get ONLY the iso3 code from the master database
30+
logging.info("2. Fetching country iso3 codes from WHO master database...")
31+
master_url = "https://extranet.who.int/tme/generateCSV.asp?ds=notifications"
32+
master_response = requests.get(master_url)
33+
if master_response.status_code != 200:
34+
logging.fatal(f"Failed to fetch master data. HTTP {master_response.status_code}")
35+
return
36+
37+
# We only pull the 'country' (for matching) and 'iso3' columns
38+
geo_columns = ['country', 'iso3']
39+
master_df = pd.read_csv(io.StringIO(master_response.text),
40+
usecols=geo_columns).drop_duplicates()
41+
42+
# 3. Merge the two datasets together based on the country name
43+
logging.info("3. Merging data and formatting...")
44+
# The API uses uppercase 'COUNTRY', the master uses lowercase 'country'
45+
merged_df = pd.merge(api_df, master_df, left_on='COUNTRY', right_on='country', how='left')
46+
47+
# Drop the duplicate lowercase 'country' column used for joining
48+
merged_df = merged_df.drop(columns=['country'])
49+
50+
# Reorder columns so the iso3 code sits right next to the Country name
51+
final_columns = [
52+
'IND_ID', 'INDICATOR_NAME', 'DISAGGR_1', 'YEAR', 'COUNTRY', 'iso3', 'VALUE'
53+
]
54+
merged_df = merged_df[final_columns]
55+
56+
# 4. Save to CSV in a new folder
57+
output_dir = "input_files"
58+
filename = os.path.join(output_dir, "Tuberculosis_outcome_TB_HIV.csv")
59+
60+
os.makedirs(output_dir, exist_ok=True)
61+
62+
# Save without the pandas index column
63+
merged_df.to_csv(filename, index=False)
64+
logging.info(f"Success! Data saved locally as '{filename}'")
65+
66+
if __name__ == "__main__":
67+
download_who_data()

0 commit comments

Comments
 (0)