Skip to content

Commit af0d79d

Browse files
committed
Get the importer working and able to build SQL
0 parents  commit af0d79d

13 files changed

Lines changed: 271661 additions & 0 deletions

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
olympics-database.sql
2+
dml/*.sql

Makefile

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
db=olympics
2+
3+
test:
4+
dropdb createdb build_inserts build_types_and_tables insert
5+
6+
insert:
7+
psql ${db} < ./dml/insert_data.sql
8+
9+
build_inserts:
10+
ruby ./build/extract.rb > ./dml/insert_data.sql
11+
12+
dropdb:
13+
dropdb --if-exists ${db}
14+
15+
createdb:
16+
createdb ${db}
17+
18+
build_types_and_tables:
19+
psql ${db} < ./ddl/country.sql
20+
psql ${db} < ./ddl/athlete.sql
21+
psql ${db} < ./ddl/games.sql
22+
psql ${db} < ./ddl/sport.sql
23+
psql ${db} < ./ddl/event.sql
24+
psql ${db} < ./ddl/appearance.sql
25+
26+
build_dump:
27+
cat ddl/{country,athlete,sport,games,event,appearance}.sql dml/insert_data.sql > olympics-database.sql

build/extract.rb

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
require "csv"
2+
require "pry"
3+
4+
output = []
5+
6+
Q = Struct.new(:raw_value, :quotable) do
7+
def value
8+
raw_value == 'NA' ? 'null' : raw_value
9+
end
10+
11+
def quotable?
12+
quotable
13+
end
14+
end
15+
16+
class Insertable
17+
def data
18+
"(" + quoted_values.join(", ") + ")"
19+
end
20+
21+
def quoted_values
22+
values.map do |qv|
23+
if qv.value == 'null'
24+
'null'
25+
elsif qv.quotable
26+
%($$#{qv.value}$$)
27+
else
28+
qv.value
29+
end
30+
end
31+
end
32+
end
33+
34+
class Country < Insertable
35+
attr_reader :code, :name, :alternative_name
36+
37+
def initialize(code, name, alternative_name)
38+
@code, @name, @alternative_name = code, name, alternative_name
39+
end
40+
41+
def values
42+
[
43+
Q.new(code, true),
44+
Q.new(name, true),
45+
(
46+
if alternative_name
47+
Q.new(alternative_name, true)
48+
else
49+
Q.new("null", false)
50+
end
51+
)
52+
].compact
53+
end
54+
end
55+
56+
class Athlete < Insertable
57+
attr_reader :id, :name, :sex, :height, :weight
58+
59+
def initialize(id, name, sex, height, weight)
60+
@id = id
61+
@name = name
62+
@sex = sex
63+
@height = height
64+
@weight = weight
65+
end
66+
67+
def values
68+
[Q.new(id, false), Q.new(name, true), Q.new(sex, true), Q.new(height, false), Q.new(weight, false)]
69+
end
70+
end
71+
72+
class Games < Insertable
73+
attr_reader :code, :name, :year, :season, :city
74+
75+
def initialize(code, name, year, season, city)
76+
@code = code
77+
@name = name
78+
@year = year
79+
@season = season
80+
@city = city
81+
end
82+
83+
def values
84+
[Q.new(code, true), Q.new(name, true), Q.new(year, false), Q.new(season, true), Q.new(city, true)]
85+
end
86+
end
87+
88+
class Sport < Insertable
89+
attr_reader :id, :name
90+
91+
def initialize(id, name)
92+
@id = id
93+
@name = name
94+
end
95+
96+
def values
97+
[Q.new(id, false), Q.new(name, true)]
98+
end
99+
end
100+
101+
class Event < Insertable
102+
attr_reader :id, :sport_id, :name
103+
104+
def initialize(id, sport_id, name)
105+
@id = id
106+
@sport_id = sport_id
107+
@name = name
108+
end
109+
110+
def values
111+
[Q.new(id, false), Q.new(sport_id, false), Q.new(name, true)]
112+
end
113+
end
114+
115+
class Appearance < Insertable
116+
attr_reader :athlete_id, :country_code, :games_code, :event_id, :age, :medal
117+
118+
def initialize(athlete_id, country_code, games_code, event_id, age, medal)
119+
@athlete_id = athlete_id
120+
@country_code = country_code
121+
@games_code = games_code
122+
@event_id = event_id
123+
@age = age
124+
@medal = medal
125+
end
126+
127+
def values
128+
[Q.new(athlete_id, false), Q.new(country_code, true), Q.new(games_code, true), Q.new(event_id, false), Q.new(age, false), Q.new(medal, true)]
129+
end
130+
end
131+
132+
def prepare(values)
133+
values.map(&:data).join(",\n")
134+
end
135+
136+
def create_insert(statement, values)
137+
[statement, values, ";"].join("\n")
138+
end
139+
140+
def extract_sex(v)
141+
if v == 'M'
142+
'Male'
143+
elsif v == 'F'
144+
'Female'
145+
end
146+
end
147+
148+
def games_code(year, season)
149+
"#{year.to_s}-#{season.chars.first}"
150+
end
151+
152+
data = CSV.read("./raw/athlete_events.csv", headers: true)
153+
154+
countries = CSV
155+
.read("./raw/noc_regions.csv", headers: true)
156+
.map { |c| Country.new(c["NOC"], c["region"], c["notes"]) }
157+
158+
output << create_insert(<<~SQL.strip, prepare(countries))
159+
insert into country (code, name, alternative_name) values
160+
SQL
161+
162+
athletes = data
163+
.map { |r| [r['ID'], r['Name'], extract_sex(r['Sex']), r['Height'], r['Weight']] }
164+
.uniq
165+
.map { |r| Athlete.new(*r) }
166+
167+
output << create_insert(<<~SQL.strip, prepare(athletes))
168+
insert into athlete (id, name, sex, height, weight) values
169+
SQL
170+
171+
sports = data.map { |r| r['Sport'] }.uniq.map.with_index(1) { |s, i| [i, s] }.map { |r| Sport.new(*r) }
172+
173+
output << create_insert(<<~SQL.strip, prepare(sports))
174+
insert into sport (id, name) values
175+
SQL
176+
177+
sports_lookup = sports.each.with_object({}) { |s, h| h[s.name] = s.id }
178+
179+
events = data
180+
.map { |r| [r['Sport'], r['Event']] }
181+
.uniq
182+
.map
183+
.with_index(1) { |e, i| [i, sports_lookup[e[0]], e[1]] }
184+
.map { |r| Event.new(*r) }
185+
186+
output << create_insert(<<~SQL.strip, prepare(events))
187+
insert into event (id, sport_id, name) values
188+
SQL
189+
190+
events_lookup = events.each.with_object({}) { |e, h| h[e.name] = e.id }
191+
192+
games = data
193+
.map { |r| [games_code(r['Year'], r['Season']), [r['Year'], r['City']].join(" - "), r['Year'], r['Season'], r['City']] }
194+
.uniq { |r| [r[0]] }
195+
.map { |r| Games.new(*r) }
196+
197+
output << create_insert(<<~SQL.strip, prepare(games))
198+
insert into games (code, name, year, season, city) values
199+
SQL
200+
201+
appearances = data
202+
.map { |r| [r['ID'], r['NOC'], games_code(r['Year'], r['Season']), events_lookup[r['Event']], r['Age'], r['Medal']] }
203+
.uniq
204+
.map { |r| Appearance.new(*r) }
205+
206+
output << create_insert(<<~SQL.strip, prepare(appearances))
207+
insert into appearance (athlete_id, country_code, games_code, event_id, age, medal) values
208+
SQL
209+
210+
File.write("./dml/insert_data.sql", output.join("\n\n"))

ddl/appearance.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
drop table if exists appearance;
2+
3+
create type medal as enum ('Gold', 'Silver', 'Bronze');
4+
5+
create table appearance (
6+
athlete_id integer not null references athlete(id),
7+
country_code char(3) not null references country(code),
8+
games_code char(6) not null references games(code),
9+
event_id integer not null references event(id),
10+
age integer null,
11+
medal medal null
12+
);
13+
14+
create unique index appearance_athlete_games_event on appearance(athlete_id, games_code, event_id, medal);
15+
create index appearance_games_athlete on appearance(games_code, athlete_id);

ddl/athlete.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
drop table if exists athlete;
2+
drop type if exists sex;
3+
4+
create type sex as enum ('Male', 'Female');
5+
6+
create table athlete (
7+
id integer primary key,
8+
name varchar(128) not null,
9+
sex sex not null,
10+
height integer null,
11+
weight integer null
12+
);
13+
14+
create index athlete_name on athlete(name);

ddl/country.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
drop table if exists country;
2+
3+
create table country (
4+
code char(3) primary key,
5+
name varchar(32) null,
6+
alternative_name varchar(32) null
7+
);
8+
9+
create index country_name on country(name);

ddl/create_database.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
drop database if exists olympics;
2+
create database olympics;

ddl/event.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
drop table if exists event;
2+
3+
create table event (
4+
id integer primary key,
5+
sport_id integer not null references sport(id),
6+
name varchar(128) not null
7+
);
8+
9+
create index event_name on event(name);
10+
create index event_sport_id on event(sport_id);

ddl/games.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
drop table if exists games;
2+
3+
create type season as enum ('Summer', 'Winter');
4+
5+
create table games (
6+
code char(6) primary key,
7+
name varchar(32) not null,
8+
year integer not null,
9+
season season not null,
10+
city varchar(32) not null
11+
);
12+
13+
create index games_name on games(name);
14+
create index games_city on games(city);
15+
create index games_year on games(year);

ddl/sport.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
drop table if exists sport;
2+
3+
create table sport (
4+
id integer primary key,
5+
name varchar(32) not null
6+
);
7+
8+
create index sport_name on sport(name);

0 commit comments

Comments
 (0)