|
| 1 | +# QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ |
| 2 | +# QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ |
| 3 | +# QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ |
| 4 | +# QQQQQQQQQQQQQQQQQQQWQQQQQWWWBBBHHHHHHHHHBWWWQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ |
| 5 | +# QQQQQQQQQQQQQQQD!`__ssaaaaaaaaaass_ass_s____. -~""??9VWQQQQQQQQQQQQQQQQQQQ |
| 6 | +# QQQQQQQQQQQQQP'_wmQQQWWBWV?GwwwmmWQmwwwwwgmZUVVHAqwaaaac,"?9$QQQQQQQQQQQQQQ |
| 7 | +# QQQQQQQQQQQW! aQWQQQQW?qw#TTSgwawwggywawwpY?T?TYTYTXmwwgZ$ma/-?4QQQQQQQQQQQ |
| 8 | +# QQQQQQQQQQW' jQQQQWTqwDYauT9mmwwawww?WWWWQQQQQ@TT?TVTT9HQQQQQQw,-4QQQQQQQQQ |
| 9 | +# QQQQQQQQQQ[ jQQQQQyWVw2$wWWQQQWWQWWWW7WQQQQQQQQPWWQQQWQQw7WQQQWWc)WWQQQQQQQ |
| 10 | +# QQQQQQQQQf jQQQQQWWmWmmQWU???????9WWQmWQQQQQQQWjWQQQQQQQWQmQQQQWL 4QQQQQQQQ |
| 11 | +# QQQQQQQP'.yQQQQQQQQQQQP" <wa,.!4WQQQQQQQWdWP??!"??4WWQQQWQQc ?QWQQQQQ |
| 12 | +# QQQQQP'_a.<aamQQQW!<yF "!` .. "??$Qa "WQQQWTVP' "??' =QQmWWV?46/ ?QQQQQ |
| 13 | +# QQQP'sdyWQP?!`.-"?46mQQQQQQT!mQQgaa. <wWQQWQaa _aawmWWQQQQQQQQQWP4a7g -WWQQ |
| 14 | +# QQ[ j@mQP'adQQP4ga, -????" <jQQQQQWQQQQQQQQQWW;)WQWWWW9QQP?"` -?QzQ7L ]QQQ |
| 15 | +# QW jQkQ@ jWQQD'-?$QQQQQQQQQQQQQQQQQWWQWQQQWQQQc "4QQQQa .QP4QQQQfWkl jQQQ |
| 16 | +# QE ]QkQk $D?` waa "?9WWQQQP??T?47`_aamQQQQQQWWQw,-?QWWQQQQQ`"QQQD\Qf(.QWQQ |
| 17 | +# QQ,-Qm4Q/-QmQ6 "WWQma/ "??QQQQQQL 4W"- -?$QQQQWP`s,awT$QQQ@ "QW@?$:.yQQQQ |
| 18 | +# QQm/-4wTQgQWQQ, ?4WWk 4waac -???$waQQQQQQQQF??'<mWWWWWQW?^ ` ]6QQ' yQQQQQ |
| 19 | +# QQQQw,-?QmWQQQQw a, ?QWWQQQw _. "????9VWaamQWV???" a j/ ]QQf jQQQQQQ |
| 20 | +# QQQQQQw,"4QQQQQQm,-$Qa ???4F jQQQQQwc <aaas _aaaaa 4QW ]E )WQ`=QQQQQQQ |
| 21 | +# QQQQQQWQ/ $QQQQQQQa ?H ]Wwa, ???9WWWh dQWWW,=QWWU? ?! )WQ ]QQQQQQQ |
| 22 | +# QQQQQQQQQc-QWQQQQQW6, QWQWQQQk <c jWQ ]QQQQQQQ |
| 23 | +# QQQQQQQQQQ,"$WQQWQQQQg,."?QQQQ'.mQQQmaa,., . .; QWQ.]QQQQQQQ |
| 24 | +# QQQQQQQQQWQa ?$WQQWQQQQQa,."?( mQQQQQQW[:QQQQm[ ammF jy! j( } jQQQ(:QQQQQQQ |
| 25 | +# QQQQQQQQQQWWma "9gw?9gdB?QQwa, -??T$WQQ;:QQQWQ ]WWD _Qf +?! _jQQQWf QQQQQQQ |
| 26 | +# QQQQQQQQQQQQQQQws "Tqau?9maZ?WQmaas,, --~-- --- . _ssawmQQQQQQk 3QQQQWQ |
| 27 | +# QQQQQQQQQQQQQQQQWQga,-?9mwad?1wdT9WQQQQQWVVTTYY?YTVWQQQQWWD5mQQPQQQ ]QQQQQQ |
| 28 | +# QQQQQQQWQQQQQQQQQQQWQQwa,-??$QwadV}<wBHHVHWWBHHUWWBVTTTV5awBQQD6QQQ ]QQQQQQ |
| 29 | +# QQQQQQQQQQQQQQQQQQQQQQWWQQga,-"9$WQQmmwwmBUUHTTVWBWQQQQWVT?96aQWQQQ ]QQQQQQ |
| 30 | +# QQQQQQQQQQWQQQQWQQQQQQQQQQQWQQma,-?9$QQWWQQQQQQQWmQmmmmmQWQQQQWQQW(.yQQQQQW |
| 31 | +# QQQQQQQQQQQQQWQQQQQQWQQQQQQQQQQQQQga%,. -??9$QQQQQQQQQQQWQQWQQV? sWQQQQQQQ |
| 32 | +# QQQQQQQQQWQQQQQQQQQQQQQQWQQQQQQQQQQQWQQQQmywaa,;~^"!???????!^`_saQWWQQQQQQQ |
| 33 | +# QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQWWWWQQQQQmwywwwwwwmQQWQQQQQQQQQQQ |
| 34 | +# QQQQQQQWQQQWQQQQQQWQQQWQQQQQWQQQQQQQQQQQQQQQQWQQQQQWQQQWWWQQQQQQQQQQQQQQQWQ |
| 35 | +# credit to luiscoms for this beautiful ascii art |
| 36 | +# https://gist.github.com/luiscoms/f3703016ee218fd5283b |
| 37 | + |
| 38 | +from bs4 import BeautifulSoup |
| 39 | +from selenium import webdriver |
| 40 | +from selenium.webdriver import Firefox |
| 41 | +from selenium.webdriver.common.by import By |
| 42 | +from selenium.webdriver.support.ui import WebDriverWait |
| 43 | +from selenium.webdriver.support import expected_conditions as EC |
| 44 | + |
| 45 | +import json, re, sys |
| 46 | + |
| 47 | +BASE_URL = "https://www.fantasypros.com/nfl/rankings/" |
| 48 | + |
| 49 | +STD_URL = "consensus-cheatsheets.php" |
| 50 | +HALF_PPR_URL = "half-point-ppr-cheatsheets.php" |
| 51 | +PPR_URL = "ppr-cheatsheets.php" |
| 52 | + |
| 53 | +FORMATS = { |
| 54 | + "standard" : STD_URL, |
| 55 | + "half_ppr" : HALF_PPR_URL, |
| 56 | + "ppr" : PPR_URL |
| 57 | +} |
| 58 | + |
| 59 | +# Given a URL, use Selenium to open a web browser, fiddle with the page to cause |
| 60 | +# the JavaScript to run (and populate the table), then scrape the html |
| 61 | +def scrape_html(url): |
| 62 | + driver = Firefox(executable_path='./geckodriver.exe') |
| 63 | + driver.set_window_size(600, 400) |
| 64 | + driver.get(url) |
| 65 | + driver.execute_script("window.scrollTo(0, 420)") # blaze it |
| 66 | + element = WebDriverWait(driver, 5).until( |
| 67 | + EC.presence_of_element_located((By.ID, "ranking-table")) |
| 68 | + ) |
| 69 | + html = driver.page_source |
| 70 | + driver.quit() |
| 71 | + return html |
| 72 | + |
| 73 | +# Given a string containing the name AND team of a player, return two strings |
| 74 | +# containing just the name and just the team. |
| 75 | +# e.g., Christian McCaffrey (CAR) -> Christian McCaffrey, CAR |
| 76 | +def parse_name_and_team(name_and_team): |
| 77 | + m = re.match(r'(.+?)\(([A-Z]+)\)', name_and_team) |
| 78 | + return m.group(1).strip(), m.group(2) |
| 79 | + |
| 80 | +# Given a positional ranking score, return the accronym for the position |
| 81 | +# e.g., RB1 -> RB |
| 82 | +def parse_position(positional_rank): |
| 83 | + m = re.match(r'([A-Z]+?)\d+', positional_rank) |
| 84 | + return m.group(1) |
| 85 | + |
| 86 | +# Given the html data from the Fantasy Pros ranking table, parse the information |
| 87 | +# and return json formatted data |
| 88 | +def parse_player_data(html): |
| 89 | + soup = BeautifulSoup(html, 'html.parser') |
| 90 | + rows = soup.find("table", id="ranking-table").find_all("tr") |
| 91 | + rows.pop(0) |
| 92 | + |
| 93 | + players = [] |
| 94 | + current_tier = 0 |
| 95 | + for row in rows: |
| 96 | + m = re.match(r'Tier\s(\d+)', row.text) |
| 97 | + if m: |
| 98 | + current_tier = m.group(1) |
| 99 | + else: |
| 100 | + # PLAYER ROW |
| 101 | + # [0] is overall ranking e.g., 1 |
| 102 | + # [2] is name + (Team) e.g., Christian McCaffrey (CAR) |
| 103 | + # [3] is position+rank e.g., RB1 |
| 104 | + # [4] is bye week e.g., 13 |
| 105 | + data = row.find_all("td") |
| 106 | + player = {} |
| 107 | + player['tier'] = current_tier |
| 108 | + player['rank'] = data[0].text |
| 109 | + player['positional_rank'] = data[3].text |
| 110 | + name, team = parse_name_and_team(data[2].text) |
| 111 | + player['name'] = name |
| 112 | + player['team'] = team |
| 113 | + player['position'] = parse_position(data[3].text) |
| 114 | + player['bye_week'] = data[4].text |
| 115 | + players.append(player) |
| 116 | + return players |
| 117 | + |
| 118 | +if __name__ == "__main__": |
| 119 | + for f in FORMATS.keys(): |
| 120 | + data = {} |
| 121 | + data['format'] = f |
| 122 | + data['rankings'] = parse_player_data(scrape_html(BASE_URL + FORMATS[f])) |
| 123 | + with open('rankings_{}.json'.format(f), 'w') as outfile: |
| 124 | + json.dump(data, outfile) |
0 commit comments