-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathworldMap.R
More file actions
143 lines (118 loc) · 4.91 KB
/
worldMap.R
File metadata and controls
143 lines (118 loc) · 4.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#Purpose: Plot the world map for the EEGManyPipes sample
#Project: EEGManyPipes
#Paper: Trübutschek, D. et al. EEGManyPipelines: A large-scale, grass-root multi-analyst study of EEG analysis practices in the wild. (2022). doi:10.31222/osf.io/jq342
#Author: Y. Yang D. Truebutschek, & M. C. Vinding,
#Date: 17-10-2022
################################################################################
#Necessary imports
library(ggplot2)
library(ggrepel)
library(tidyverse)
library(rworldmap)
library(RColorBrewer)
library(countrycode)
################################################################################
#Path definitions
if ( Sys.getenv("USER") == 'mcvinding' ){
data.path <- '/Users/mcvinding/Documents/EEGManyPipelines/metadata_summary'
} else if (Sys.getenv("USERNAME") == 'Mikkel'){
data.path <- 'C:/Users/Mikkel/Documents/EEGManyPipelines/metadata_summary'
} else if (Sys.getenv("USER") == 'darinka'){
data.path <- '/home/darinka/Documents/EEGManyPipes/metadata_summary/data'
} else if (Sys.getenv("USERNAME") == 'darinka.truebutschek'){
data.path <- 'C:/Users/darinka.truebutschek/Documents/EEGManyPipelines/metadata_summary/data'
} else {
# Paths and file for Yu-Fang
rm(list=ls())
path= dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(path)
getwd()
data <- read.csv("final_data.csv")
}
setwd(data.path)
################################################################################
#Load data
data <- read.csv("final_data.csv")
################################################################################
#Check how many within country vs. between country teams there are
data$country <- as.factor(data$country)
dat2analyze <- data %>%
select(team, country) %>%
arrange(team) %>%
group_by(team) %>%
mutate(teamSize=n()) %>%
ungroup()
dat2analyze$teamSize <- as.integer(dat2analyze$teamSize)
dat2analyze_subset <- dat2analyze %>%
filter(teamSize > 1) %>%
group_by(team) %>%
mutate(same = n_distinct(country)) %>%
ungroup()
dat2analyze_final <- dat2analyze_subset %>%
filter(!duplicated(team))
sum(dat2analyze_final$same == 1)
sum(dat2analyze_final$same > 1)
################################################################################
#Prepare data for plotting
data$country <- as.factor(data$country)
#Extract # of teams for the different countries
df_frq <- data %>%
select(country) %>%
mutate(country =
case_when(country =="Korea" | country =="Republic of Korea" ~"South Korea",
country == "UAE"~"United Arab Emirates",
country == "USA" ~ "United States",
TRUE ~ as.character(country))) %>%
group_by(country) %>%
mutate(value1=n()) %>%
arrange(value1) %>%
select(value1,country) %>%
ungroup()
df_frq$value1 <- as.numeric(df_frq$value1)
head(df_frq)
#Grouping data
df_country1 <-unique(df_frq) #37 countries
df_country1 <- df_country1 %>%
mutate(value =
case_when((value1==1) ~ '1',
(value1==2) ~ '2',
(value1==3) ~ '3',
(value1 >=4 & value1 <=5) ~ '4',
(value1 >=6 & value1 <=10) ~ '5',
(value1 >=11 & value1 <=20) ~ '6',
(value1 >=21 & value1 <=40) ~ '7',
(value1 >40) ~ '8')) %>%
df_country <- df_country1
#Change col names
colnames(df_country)<- c('value', 'country', 'value_recoded')
df_country$country <- as.factor(df_country$country)
df_country$value_recoded <- as.factor(df_country$value_recoded)
################################################################################
#Plot
#Joining the data with a map
df_country_map <- joinCountryData2Map(df_country, joinCode="NAME", nameJoinColumn="country", verbose=TRUE)
#Remove Antarctica from the world map
df_country_new <- subset(df_country_map, continent != "Antarctica")
#Create colormap
YYPalette <- c('#D2E3E1', '#C3D9D7', '#B4D0CE', '#A5C7C4', '#96BDBA', '#87B4B0', '#78AAA6', '#69A19C')
#Create a map
mapParams <- mapCountryData(df_country_new,
nameColumnToPlot = "value_recoded",
catMethod = 'categorical',
missingCountryCol = 'darkgray',#gray(.98), #"white"
addLegend = F,
lwd = .8,
borderCol = 'white',
colourPalette= YYPalette)
do.call(addMapLegendBoxes, c(mapParams,
x='bottom',
horiz=T,
bg='transparent',
bty='n'))
################################################################################
#Extract percentages of continents
analyst_countries <- data$country
emp_continent <- countrycode(sourcevar = analyst_countries, origin = "country.name", destination = "continent")
n_analysts <- length(emp_continent)
freqTable <- table(emp_continent) / n_analysts
freqTable <- freqTable * 100