-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimportEPI.R
More file actions
112 lines (77 loc) · 3.81 KB
/
importEPI.R
File metadata and controls
112 lines (77 loc) · 3.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
################################################################################
################################################################################
# Data preparation: Environmental Performance Index (EPI)
require("reshape2")
## Import EPI data
importEPI <- function(){
# If necessary, download files to directory "2020"
if(!file.exists("2020/epi2020results20200604.csv") |
!file.exists("2020/epi2020indicatortla20200604.csv") |
!file.exists("2020/epi2020countryattributes20200604.csv") |
!file.exists("2020/epi2020variableattributes20200604.csv")) {
"Files absent, downloading them now:"
system("wget -P 2020 https://epi.yale.edu/downloads/epi2020results20200604.csv https://epi.yale.edu/downloads/epi2020indicatortla20200604.csv https://epi.yale.edu/downloads/epi2020countryattributes20200604.csv https://epi.yale.edu/downloads/epi2020variableattributes20200604.csv")
}
# Read all csv files
epi <- lapply(list.files(pattern = glob2rx("epi2020*.csv"), recursive = TRUE),
read.table, sep = ",", header = TRUE, quote = "")
# lapply(epi, head)
# lapply(epi, str)
# class(epi)
# epi
# Name the list by retrieving the file names and extract the informative part
names(epi) <- gsub("\\d{8}.csv", "",
gsub("2020/epi2020", "",
list.files(pattern = glob2rx("epi2020*.csv"),
recursive = TRUE)))
# names(epi)
# Release data.frames from list into global environment
list2env(epi, .GlobalEnv)
# rm(epi, envir = .GlobalEnv)
# Rename to more "speaking" name
epi <- results
# Add regions
epi <- merge(epi, countryattributes[, 3:4], by = "country")
# str(epi)
## Add GDP from worldbank
if(!file.exists("2020/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_2017804.csv")) {
"File absent, downloading it now:"
system("wget -O 2020/wb_gdp.zip 'http://api.worldbank.org/v2/en/indicator/NY.GDP.MKTP.CD?downloadformat=csv' && unzip -d 2020 2020/wb_gdp.zip && rm 2020/wb_gdp.zip")
}
gdp <- read.table("2020/API_NY.GDP.MKTP.CD_DS2_en_csv_v2_2017804.csv",
header = TRUE, sep = ",", skip = 4)
gdp <- gdp[, (colnames(gdp) %in% c("Country.Name", "Country.Code", "X2019"))]
names(gdp)[3] <- "GDP2019"
## Match country iso codes and add GDP to EPI data.frame
# setdiff(epi$iso, gdp$Country.Code) # Taiwan is missing in world bank data
epi <- merge(epi, gdp, by.x = "iso", by.y = "Country.Code")
names(epi)
# Subset by selecting columns which match "new" (for the 2020 EPI values) and
# a negative lookbehind to exclude columns containing the new ranks. Also keep
# the columns "code", "iso" and "country" (i.e. all that contain only letters)
epi <- epi[, (colnames(epi) %in% grep("(?<!rnk).new|^[^.]*$", names(epi),
value = TRUE, perl = TRUE)),
drop = FALSE]
# names(epi)
# Convert into long form
epi.long <- melt(epi, value.name = "EPI.new.value", variable.name = "EPI.new",
id.vars = c("code", "iso", "country", "region", "GDP2019"))
# str(epi.long)
# summary(epi.long$EPI.new.value)
# Convert to factor
epi.long$country <- as.factor(epi.long$country)
epi.long$region <- as.factor(epi.long$region)
## Remove ".new" from EPIs
epi.long$EPI.new <- as.factor(substr(epi.long$EPI.new, 1, 3))
str(epi.long)
## Add EPI levels
epi.long <- merge(epi.long, indicatortla[, 1:3],
by.x = "EPI.new", by.y = "Abbreviation")
## Clean workspace
rm(results, countryattributes, variableattributes, indicatortla,
envir = .GlobalEnv)
epi.list <- list(epi = epi, epi.long = epi.long)
return(epi.list)
}
################################################################################
################################################################################