|
| 1 | +require "csv" |
| 2 | +require "pry" |
| 3 | + |
| 4 | +output = [] |
| 5 | + |
| 6 | +Q = Struct.new(:raw_value, :quotable) do |
| 7 | + def value |
| 8 | + raw_value == 'NA' ? 'null' : raw_value |
| 9 | + end |
| 10 | + |
| 11 | + def quotable? |
| 12 | + quotable |
| 13 | + end |
| 14 | +end |
| 15 | + |
| 16 | +class Insertable |
| 17 | + def data |
| 18 | + "(" + quoted_values.join(", ") + ")" |
| 19 | + end |
| 20 | + |
| 21 | + def quoted_values |
| 22 | + values.map do |qv| |
| 23 | + if qv.value == 'null' |
| 24 | + 'null' |
| 25 | + elsif qv.quotable |
| 26 | + %($$#{qv.value}$$) |
| 27 | + else |
| 28 | + qv.value |
| 29 | + end |
| 30 | + end |
| 31 | + end |
| 32 | +end |
| 33 | + |
| 34 | +class Country < Insertable |
| 35 | + attr_reader :code, :name, :alternative_name |
| 36 | + |
| 37 | + def initialize(code, name, alternative_name) |
| 38 | + @code, @name, @alternative_name = code, name, alternative_name |
| 39 | + end |
| 40 | + |
| 41 | + def values |
| 42 | + [ |
| 43 | + Q.new(code, true), |
| 44 | + Q.new(name, true), |
| 45 | + ( |
| 46 | + if alternative_name |
| 47 | + Q.new(alternative_name, true) |
| 48 | + else |
| 49 | + Q.new("null", false) |
| 50 | + end |
| 51 | + ) |
| 52 | + ].compact |
| 53 | + end |
| 54 | +end |
| 55 | + |
| 56 | +class Athlete < Insertable |
| 57 | + attr_reader :id, :name, :sex, :height, :weight |
| 58 | + |
| 59 | + def initialize(id, name, sex, height, weight) |
| 60 | + @id = id |
| 61 | + @name = name |
| 62 | + @sex = sex |
| 63 | + @height = height |
| 64 | + @weight = weight |
| 65 | + end |
| 66 | + |
| 67 | + def values |
| 68 | + [Q.new(id, false), Q.new(name, true), Q.new(sex, true), Q.new(height, false), Q.new(weight, false)] |
| 69 | + end |
| 70 | +end |
| 71 | + |
| 72 | +class Games < Insertable |
| 73 | + attr_reader :code, :name, :year, :season, :city |
| 74 | + |
| 75 | + def initialize(code, name, year, season, city) |
| 76 | + @code = code |
| 77 | + @name = name |
| 78 | + @year = year |
| 79 | + @season = season |
| 80 | + @city = city |
| 81 | + end |
| 82 | + |
| 83 | + def values |
| 84 | + [Q.new(code, true), Q.new(name, true), Q.new(year, false), Q.new(season, true), Q.new(city, true)] |
| 85 | + end |
| 86 | +end |
| 87 | + |
| 88 | +class Sport < Insertable |
| 89 | + attr_reader :id, :name |
| 90 | + |
| 91 | + def initialize(id, name) |
| 92 | + @id = id |
| 93 | + @name = name |
| 94 | + end |
| 95 | + |
| 96 | + def values |
| 97 | + [Q.new(id, false), Q.new(name, true)] |
| 98 | + end |
| 99 | +end |
| 100 | + |
| 101 | +class Event < Insertable |
| 102 | + attr_reader :id, :sport_id, :name |
| 103 | + |
| 104 | + def initialize(id, sport_id, name) |
| 105 | + @id = id |
| 106 | + @sport_id = sport_id |
| 107 | + @name = name |
| 108 | + end |
| 109 | + |
| 110 | + def values |
| 111 | + [Q.new(id, false), Q.new(sport_id, false), Q.new(name, true)] |
| 112 | + end |
| 113 | +end |
| 114 | + |
| 115 | +class Appearance < Insertable |
| 116 | + attr_reader :athlete_id, :country_code, :games_code, :event_id, :age, :medal |
| 117 | + |
| 118 | + def initialize(athlete_id, country_code, games_code, event_id, age, medal) |
| 119 | + @athlete_id = athlete_id |
| 120 | + @country_code = country_code |
| 121 | + @games_code = games_code |
| 122 | + @event_id = event_id |
| 123 | + @age = age |
| 124 | + @medal = medal |
| 125 | + end |
| 126 | + |
| 127 | + def values |
| 128 | + [Q.new(athlete_id, false), Q.new(country_code, true), Q.new(games_code, true), Q.new(event_id, false), Q.new(age, false), Q.new(medal, true)] |
| 129 | + end |
| 130 | +end |
| 131 | + |
| 132 | +def prepare(values) |
| 133 | + values.map(&:data).join(",\n") |
| 134 | +end |
| 135 | + |
| 136 | +def create_insert(statement, values) |
| 137 | + [statement, values, ";"].join("\n") |
| 138 | +end |
| 139 | + |
| 140 | +def extract_sex(v) |
| 141 | + if v == 'M' |
| 142 | + 'Male' |
| 143 | + elsif v == 'F' |
| 144 | + 'Female' |
| 145 | + end |
| 146 | +end |
| 147 | + |
| 148 | +def games_code(year, season) |
| 149 | + "#{year.to_s}-#{season.chars.first}" |
| 150 | +end |
| 151 | + |
| 152 | +data = CSV.read("./raw/athlete_events.csv", headers: true) |
| 153 | + |
| 154 | +countries = CSV |
| 155 | + .read("./raw/noc_regions.csv", headers: true) |
| 156 | + .map { |c| Country.new(c["NOC"], c["region"], c["notes"]) } |
| 157 | + |
| 158 | +output << create_insert(<<~SQL.strip, prepare(countries)) |
| 159 | + insert into country (code, name, alternative_name) values |
| 160 | +SQL |
| 161 | + |
| 162 | +athletes = data |
| 163 | + .map { |r| [r['ID'], r['Name'], extract_sex(r['Sex']), r['Height'], r['Weight']] } |
| 164 | + .uniq |
| 165 | + .map { |r| Athlete.new(*r) } |
| 166 | + |
| 167 | +output << create_insert(<<~SQL.strip, prepare(athletes)) |
| 168 | + insert into athlete (id, name, sex, height, weight) values |
| 169 | +SQL |
| 170 | + |
| 171 | +sports = data.map { |r| r['Sport'] }.uniq.map.with_index(1) { |s, i| [i, s] }.map { |r| Sport.new(*r) } |
| 172 | + |
| 173 | +output << create_insert(<<~SQL.strip, prepare(sports)) |
| 174 | + insert into sport (id, name) values |
| 175 | +SQL |
| 176 | + |
| 177 | +sports_lookup = sports.each.with_object({}) { |s, h| h[s.name] = s.id } |
| 178 | + |
| 179 | +events = data |
| 180 | + .map { |r| [r['Sport'], r['Event']] } |
| 181 | + .uniq |
| 182 | + .map |
| 183 | + .with_index(1) { |e, i| [i, sports_lookup[e[0]], e[1]] } |
| 184 | + .map { |r| Event.new(*r) } |
| 185 | + |
| 186 | +output << create_insert(<<~SQL.strip, prepare(events)) |
| 187 | + insert into event (id, sport_id, name) values |
| 188 | +SQL |
| 189 | + |
| 190 | +events_lookup = events.each.with_object({}) { |e, h| h[e.name] = e.id } |
| 191 | + |
| 192 | +games = data |
| 193 | + .map { |r| [games_code(r['Year'], r['Season']), [r['Year'], r['City']].join(" - "), r['Year'], r['Season'], r['City']] } |
| 194 | + .uniq { |r| [r[0]] } |
| 195 | + .map { |r| Games.new(*r) } |
| 196 | + |
| 197 | +output << create_insert(<<~SQL.strip, prepare(games)) |
| 198 | + insert into games (code, name, year, season, city) values |
| 199 | +SQL |
| 200 | + |
| 201 | +appearances = data |
| 202 | + .map { |r| [r['ID'], r['NOC'], games_code(r['Year'], r['Season']), events_lookup[r['Event']], r['Age'], r['Medal']] } |
| 203 | + .uniq |
| 204 | + .map { |r| Appearance.new(*r) } |
| 205 | + |
| 206 | +output << create_insert(<<~SQL.strip, prepare(appearances)) |
| 207 | + insert into appearance (athlete_id, country_code, games_code, event_id, age, medal) values |
| 208 | +SQL |
| 209 | + |
| 210 | +File.write("./dml/insert_data.sql", output.join("\n\n")) |
0 commit comments