|
| 1 | +import csv |
1 | 2 | import datetime as dt |
| 3 | +import hashlib |
| 4 | +import os |
| 5 | +import shutil |
2 | 6 | from typing import List, Optional |
3 | 7 |
|
4 | 8 | from mapswipe_workers import auth |
5 | 9 | from mapswipe_workers.definitions import DATA_PATH, logger |
6 | 10 | from mapswipe_workers.generate_stats import overall_stats, project_stats |
7 | 11 |
|
8 | 12 |
|
| 13 | +def generate_data_for_mapswipe_website(): |
| 14 | + """ |
| 15 | + Generate data for website |
| 16 | + """ |
| 17 | + website_data_dest = f"{DATA_PATH}/api/website-data" |
| 18 | + |
| 19 | + # TODO: Move to utils |
| 20 | + def _compute_md5(file_name): |
| 21 | + hash_md5 = hashlib.md5() |
| 22 | + with open(file_name, "rb") as f: |
| 23 | + for chunk in iter(lambda: f.read(4096), b""): |
| 24 | + hash_md5.update(chunk) |
| 25 | + return hash_md5.hexdigest() |
| 26 | + |
| 27 | + def _project_history_zip(): |
| 28 | + project_history_file = f"{website_data_dest}/project-history" |
| 29 | + zip_file_name = shutil.make_archive( |
| 30 | + project_history_file, |
| 31 | + "zip", |
| 32 | + f"{DATA_PATH}/api/history/", |
| 33 | + ) |
| 34 | + logger.info("finished generate project-history zip") |
| 35 | + return zip_file_name |
| 36 | + |
| 37 | + def _manifest_file(): |
| 38 | + endpoints_dir = f"{DATA_PATH}/api/" |
| 39 | + manifest_file = f"{website_data_dest}/overall-endpoints.csv" |
| 40 | + with open(manifest_file, "w") as fp: |
| 41 | + csv_writer = csv.writer(fp) |
| 42 | + csv_writer.writerow(["endpoints", "size_bytes"]) |
| 43 | + for path, _, files in os.walk(endpoints_dir): |
| 44 | + for name in files: |
| 45 | + file_path = os.path.join(path, name) |
| 46 | + csv_writer.writerow( |
| 47 | + [ |
| 48 | + "/api/" + file_path.split("/api/")[1], |
| 49 | + os.path.getsize(file_path), |
| 50 | + ] |
| 51 | + ) |
| 52 | + logger.info("finished generate endpoints manifest for existing stats") |
| 53 | + return manifest_file |
| 54 | + |
| 55 | + def _generate_file_hash(files): |
| 56 | + for file in files: |
| 57 | + md5_hash = _compute_md5(file) |
| 58 | + with open(f"{file}.md5", "w") as fp: |
| 59 | + fp.write(md5_hash) |
| 60 | + |
| 61 | + files_to_track_for_checksum = [ |
| 62 | + f"{DATA_PATH}/api/projects/projects_centroid.geojson", |
| 63 | + f"{DATA_PATH}/api/projects/projects_geom.geojson", |
| 64 | + ] |
| 65 | + files_to_track_for_checksum.extend([_project_history_zip(), _manifest_file()]) |
| 66 | + _generate_file_hash(files_to_track_for_checksum) |
| 67 | + |
| 68 | + |
9 | 69 | def get_recent_projects(hours: int = 3): |
10 | 70 | """Get ids for projects when results have been submitted within the last x hours.""" |
11 | 71 | pg_db = auth.postgresDB() |
@@ -108,6 +168,7 @@ def generate_stats(project_id_list: Optional[List[str]] = None): |
108 | 168 | overall_stats.get_overall_stats(projects_df, overall_stats_filename) |
109 | 169 |
|
110 | 170 | logger.info(f"finished generate stats for: {project_id_list}") |
| 171 | + generate_data_for_mapswipe_website() |
111 | 172 |
|
112 | 173 |
|
113 | 174 | def generate_stats_all_projects(): |
|
0 commit comments