-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathloadingADHD_Data.py
More file actions
104 lines (78 loc) · 3.41 KB
/
loadingADHD_Data.py
File metadata and controls
104 lines (78 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 28 20:44:46 2016
@author: and_ma
"""
import pandas as pd
import scipy.io as sio
import os
import re
import matplotlib.pyplot as plt
def load(path = "/Users/and_ma/Documents/DataScience/UB_DataScience/DataScience_Project/Features/"):
#input: path to the data
#output: dataframe with all the data.
#set the path for the filenames
filenames = os.listdir(path)
#electrodes names
electrodes = ['Fp1', 'F3', 'C3', 'Fz', 'Cz', 'Fp2', 'F4', 'C4']
#bandPower names
bandPower = ['Theta2+Alpha1', 'Theta', 'Alpha', 'Beta_Global', 'Beta_Alta', 'Beta_Baja', 'Gamma']
#let us create the real dataframe
#columns for the band powers
columns_df=[]
for i in electrodes:
for j in bandPower:
combination = i + '_(' + j + ')'
columns_df.append(combination)
#columns for the band power ratios
columns_df2 = ['BPR_Fp1', 'BPR_F3', 'BPR_C3', 'BPR_Fz', 'BPR_Cz', 'BPR_Fp2', 'BPR_F4', 'BPR_C4']
#patterns to determine experiments
patternA = r'[\w]+A\.mat'
patternB = r'[\w]+B\.mat'
patternC = r'[\w]+C\.mat'
#define the list of dataframes
list_df=[]
for i in filenames:
patient = sio.loadmat(path + i)
bandPowerDF = pd.DataFrame(patient['BandPower'].reshape(1,56), columns = columns_df)
bandPowerRatioDF = pd.DataFrame(patient['BandPowerRatio'].reshape(1,8), columns = columns_df2)
patientDF = pd.concat([bandPowerDF, bandPowerRatioDF], axis=1)
patientDF['patientName']=i.split('.mat')[0]
if re.match(patternA,i) != None:
patientDF['experiment'] = 'A'
elif re.match(patternB,i) != None:
patientDF['experiment'] = 'B'
elif re.match(patternC,i) != None:
patientDF['experiment'] = 'C'
list_df.append(patientDF)
#via the concat all the dataframes are concatenated
patientsDF = pd.concat(list_df)
#nans are dropped
patientsDF = patientsDF.dropna(how='any')
#dataframe is sorted by experiment type: order A, B and C
patientsDF.sort_values(by='experiment', ascending= True, inplace=True)
#indexs are reset
patientsDF = patientsDF.reset_index(drop = True)
#let us separate the data of the three experiments
#Dropping the data from the patientsDF
#dropPatientsDF = patientsDF.drop(['experiment','patientName'],1)
return (patientsDF)
def plotCluster_matPlot(df,name_x,name_y,name_cluster, title):
df_plot = pd.DataFrame({})
df_plot['PCA_x'] = df[name_x]
df_plot['PCA_y'] = df[name_y]
df_plot['cluster'] = df[name_cluster]
number1 = len(df_plot[df_plot['cluster']==0])
#number2 = len(df_plot[df_plot['cluster']==1])
df_plot.sort_values(by='cluster', ascending= True, inplace=True) # Dataframe is sorted by cluster type: 0 or 1
### Plot result clustering in two dimensions
fig = plt.figure(figsize=(8,8))
plt.rcParams['legend.fontsize'] = 10
plt.plot(df_plot['PCA_x'].values[0:number1], df_plot['PCA_y'].values[0:number1], 'o', markersize=8, color='green', alpha=0.5, label='0')
plt.plot(df_plot['PCA_x'].values[number1:], df_plot['PCA_y'].values[number1:], '^', markersize=8, alpha=0.5, color='red', label='1')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.title(title)
plt.legend(loc='upper right')
plt.show()
return