-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathload_csv_to_mysql.py
More file actions
69 lines (54 loc) · 1.92 KB
/
load_csv_to_mysql.py
File metadata and controls
69 lines (54 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
###################################################
# purpose: load all metadata to MySQL db
# author: rich thomas
# date: 30/10/2023
##################################################
import pandas as pd
import os
import sys
import dataIO as io
from sqlalchemy import create_engine
import pymysql
import glob
# connection
def connect():
# need to swap password for local var
cnxn = create_engine('mysql+pymysql://bq21582:password_password@127.0.0.1:3306/ukllc')
return(cnxn)
# make table
def make_table(df, table_name):
# make table
df.to_sql(table_name, cnxn, if_exists = 'replace', index = False)
# test connection
cnxn = connect()
#t2 = pd.read_sql('select * from t1', cnxn)
#
os.chdir('L:\\Data\\Dev\\programs\\web app')
# data request form load
study_drf = io.load_study_request()
# and create table
make_table(study_drf, 'drf_lps')
# data request form load
nhs_drf = io.load_linked_request()
# need to adjust/trim columns names
nhs_drf = nhs_drf.rename (columns={
'Number of Participants Included (n=) \n(i.e. number of particpants with non-null data, and with UK LLC, and linkage permission)': 'Number of Participants Included',
'Health Domain Groupings (i.e. covid infection, asthma, smoking, etc.) ' : 'Health Domain Groupings'
})
# and strip whitespace
nhs_drf = nhs_drf.rename(columns = lambda x: x.strip())
# get rid of any NULL block names
nhs_drf = nhs_drf.dropna(subset = ['Block Name'])
# and create table
make_table(nhs_drf, 'drf_nhs')
# study info and links
study_info = io.load_study_info_and_links()
make_table(study_info, 'study_info')
# load all metadata
all_files = glob.glob('L:\\Data\\Dev\\programs\\web app')
for root, dirs, files in os.walk('metadata'):
for name in files:
fpath = os.path.join(root, name)
data = pd.read_csv(fpath)
tab_name = "metadata_"+root.split('\\')[1].lower() + '_' + name.split('.')[0].lower()
make_table(data, tab_name)