Skip to content

Commit 999afb4

Browse files
modified manifest and added run.sh for autorefresh (#1910)
* modified manifest and added run.sh for autorefresh * Update statvar_imports/france_demographics/run.sh Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * refactored for loop --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 997da0a commit 999afb4

3 files changed

Lines changed: 77 additions & 16 deletions

File tree

statvar_imports/france_demographics/README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,22 @@ This directory contains the scripts and configuration to import France demograph
88
- **URL**: [INSEE Population Statistics](https://www.insee.fr/en/statistiques/8333211?sommaire=8333329)
99

1010
## Directory Structure
11-
- `download_input_data.py`: Script to download source Excel files from INSEE.
11+
- `run.sh`: Primary entry point. Orchestrates downloading, renaming files, and processing.
1212
- `*_pvmap.csv`: Property-Value mapping files for the statvar processor.
1313
- `france_demographics_metadata.csv`: Metadata configuration for the import.
14-
- `manifest.json`: Import specification.
14+
- `manifest.json`: Import specification for the automation pipeline.
1515

1616
## Usage
1717

18-
### 1. Download Data
19-
Run the download script to fetch the latest data files into `input_files/`.
18+
### 1. Automated Execution
19+
To ensure the pipeline works consistently across local environments and Google Cloud Batch, use the run.sh script. This script handles the downloading of INSEE files, renames them to match expected configuration, and runs the stat_var_processor.
2020

21-
```bash
22-
python3 download_input_data.py
21+
```chmod +x run.sh
22+
./run.sh
2323
```
2424

25-
### 2. Process Data
26-
Run the `stat_var_processor.py` tool for each dataset to generate MCF and CSV files.
25+
### 2. Manual Processing
26+
If you already have the data files downloaded and renamed in input_files/, you can run the `stat_var_processor.py` tool for each dataset to generate MCF and CSV files.
2727

2828
**Annual Population Components:**
2929
```bash

statvar_imports/france_demographics/manifest.json

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,7 @@
88
"provenance_url": "https://www.insee.fr/en/statistiques/8333211?sommaire=8333329",
99
"provenance_description": "Population data for France from INSEE",
1010
"scripts": [
11-
"../../util/download_util_script.py --download_url=https://www.insee.fr/en/statistiques/fichier/8333211/Pop_annu_compo_evol_va.xlsx --output_folder=input_files/ && mv input_files/Pop_annu_compo_evol_va.xlsx input_files/annual_population_components.xlsx",
12-
"../../util/download_util_script.py --download_url=https://www.insee.fr/en/statistiques/fichier/8333211/Pop1janv_age_va.xlsx --output_folder=input_files/ && mv input_files/Pop1janv_age_va.xlsx input_files/population_sex_detailed_age.xlsx",
13-
"../../util/download_util_script.py --download_url=https://www.insee.fr/en/statistiques/fichier/8333211/Pop1janv_grages_va.xlsx --output_folder=input_files/ && mv input_files/Pop1janv_grages_va.xlsx input_files/population_sex_age_groups.xlsx",
14-
"../../util/download_util_script.py --download_url=https://www.insee.fr/en/statistiques/fichier/8333211/Pop_age_moyen_median_va.xlsx --output_folder=input_files/ && mv input_files/Pop_age_moyen_median_va.xlsx input_files/average_median_age.xlsx",
15-
"../../tools/statvar_importer/stat_var_processor.py --input_data=./input_files/annual_population_components.xlsx --pv_map=annual_population_components_pvmap.csv --config_file=france_demographics_metadata.csv --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf --output_path=output/annual_population_components_output",
16-
"../../tools/statvar_importer/stat_var_processor.py --input_data=./input_files/population_sex_detailed_age.xlsx --pv_map=population_sex_detailed_age_pvmap.csv --config_file=france_demographics_metadata.csv --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf --output_path=output/population_sex_detailed_age_output",
17-
"../../tools/statvar_importer/stat_var_processor.py --input_data=./input_files/population_sex_age_groups.xlsx --pv_map=population_sex_age_groups_pvmap.csv --config_file=france_demographics_metadata.csv --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf --output_path=output/population_sex_age_groups_output",
18-
"../../tools/statvar_importer/stat_var_processor.py --input_data=./input_files/average_median_age.xlsx --pv_map=average_median_age_pvmap.csv --config_file=france_demographics_metadata.csv --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf --output_path=output/average_median_age_output"
11+
"run.sh"
1912
],
2013
"import_inputs": [
2114
{
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!/bin/bash
2+
3+
# Exit immediately if a command exits with a non-zero status
4+
set -e
5+
6+
echo "Starting France Demographics Import..."
7+
8+
mkdir -p input_files
9+
mkdir -p output
10+
11+
download_and_rename() {
12+
local url=$1
13+
local final_name=$2
14+
local temp_name=$(basename "$url")
15+
16+
echo "Downloading $temp_name..."
17+
python3 ../../util/download_util_script.py --download_url="$url" --output_folder=input_files/
18+
19+
echo "Renaming $temp_name to $final_name..."
20+
mv "input_files/$temp_name" "input_files/$final_name"
21+
}
22+
23+
BASE_URL="https://www.insee.fr/en/statistiques/fichier/8333211"
24+
declare -A files_to_download
25+
files_to_download["Pop_annu_compo_evol_va.xlsx"]="annual_population_components.xlsx"
26+
files_to_download["Pop1janv_age_va.xlsx"]="population_sex_detailed_age.xlsx"
27+
files_to_download["Pop1janv_grages_va.xlsx"]="population_sex_age_groups.xlsx"
28+
files_to_download["Pop_age_moyen_median_va.xlsx"]="average_median_age.xlsx"
29+
30+
for source_file in "${!files_to_download[@]}"; do
31+
url="${BASE_URL}/${source_file}"
32+
dest_file="${files_to_download[$source_file]}"
33+
download_and_rename "$url" "$dest_file"
34+
done
35+
36+
echo "Verifying downloaded files:"
37+
ls -lh input_files/
38+
39+
MCF_PATH="gs://unresolved_mcf/scripts/statvar/stat_vars.mcf"
40+
CONFIG="france_demographics_metadata.csv"
41+
STATVAR_PROCESSOR="../../tools/statvar_importer/stat_var_processor.py"
42+
43+
run_processor() {
44+
local input=$1
45+
local pvmap=$2
46+
local out=$3
47+
48+
echo "Processing $input..."
49+
python3 "$STATVAR_PROCESSOR" \
50+
--input_data="input_files/$input" \
51+
--pv_map="$pvmap" \
52+
--config_file="$CONFIG" \
53+
--existing_statvar_mcf="$MCF_PATH" \
54+
--output_path="output/$out"
55+
}
56+
57+
datasets=(
58+
"annual_population_components"
59+
"population_sex_detailed_age"
60+
"population_sex_age_groups"
61+
"average_median_age"
62+
)
63+
64+
for dataset in "${datasets[@]}"; do
65+
run_processor "${dataset}.xlsx" "${dataset}_pvmap.csv" "${dataset}_output"
66+
done
67+
68+
echo "Import completed successfully."

0 commit comments

Comments
 (0)