Skip to content

Commit 5322364

Browse files
la55upgaref
authored andcommitted
implemented fake-useragent package (#49)
* Introduced fake-useragent package * Use fallback Agent in case online-db in not available * Proper UserAgent logging The default way the program gets user agents is now from an online, up-to-date database with the help of the fake-useragent package. Reading useragents from a custom local file is still available as a parameter to the UserAgentManager class. Solves #28.
1 parent fac206a commit 5322364

5 files changed

Lines changed: 55 additions & 24 deletions

File tree

http_request_randomizer/requests/proxy/requestProxy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@
3131

3232
class RequestProxy:
3333
def __init__(self, web_proxy_list=[], sustain=False, timeout=5, protocol=Protocol.HTTP):
34-
self.userAgent = UserAgentManager()
3534
self.logger = logging.getLogger()
3635
self.logger.addHandler(handler)
3736
self.logger.setLevel(0)
37+
self.userAgent = UserAgentManager()
3838

3939
#####
4040
# Each of the classes below implements a specific URL Parser
Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,25 @@
11
import os
22
import random
3+
from fake_useragent import FakeUserAgent
4+
import logging
35

6+
logger = logging.getLogger(__name__)
47

58
class UserAgentManager:
6-
def __init__(self, agent_file=os.path.join(os.path.dirname(__file__), '../data/user_agents.txt')):
7-
self.agent_file = agent_file
8-
self.useragents = self.load_user_agents(self.agent_file)
9+
def __init__(self, fallback=None, file=None):
10+
self.agent_file = file
11+
if file is not None:
12+
logger.info('Using local file for user agents: '+self.agent_file)
13+
self.useragents = self.load_user_agents(self.agent_file)
14+
else:
15+
logger.info('Using fake-useragent package for user agents.')
16+
if fallback is None:
17+
fallback = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
18+
self.fakeuseragent = FakeUserAgent(fallback=fallback, cache=False)
919

1020
def load_user_agents(self, useragentsfile):
1121
"""
12-
useragentfile : string
22+
useragentsfile : string
1323
path to text file of user agents, one per line
1424
"""
1525
useragents = []
@@ -20,28 +30,41 @@ def load_user_agents(self, useragentsfile):
2030
return useragents
2131

2232
def get_random_user_agent(self):
23-
"""
24-
useragents : string array of different user agents
25-
:param useragents:
26-
:return random agent:
27-
"""
28-
user_agent = random.choice(self.useragents)
29-
return user_agent.decode('utf-8')
33+
if self.agent_file:
34+
user_agent = random.choice(self.useragents)
35+
return user_agent.decode('utf-8')
36+
else:
37+
return self.fakeuseragent.random
3038

3139
def get_first_user_agent(self):
32-
return self.useragents[0].decode('utf-8')
40+
if self.agent_file:
41+
return self.useragents[0].decode('utf-8')
42+
else:
43+
logger.warning('Fake-useragent library does not support operaration get_first - change to user-agent file!')
44+
return None
3345

3446
def get_last_user_agent(self):
35-
return self.useragents[-1].decode('utf-8')
47+
if self.agent_file:
48+
return self.useragents[-1].decode('utf-8')
49+
else:
50+
logger.warning('Fake-useragent library does not support operaration get_last - change to user-agent file!')
51+
return None
3652

3753
def get_len_user_agent(self):
38-
return len(self.useragents)
54+
if self.agent_file:
55+
return len(self.useragents)
56+
else:
57+
logger.warning('Fake-useragent library does not support operaration get_len - change to user-agent file!')
58+
return None
3959

4060

4161
if __name__ == '__main__':
4262
ua = UserAgentManager()
43-
print("Number of User Agent headers: {0}".format(ua.get_len_user_agent()))
44-
print("First User Agent in file: {0}".format(ua.get_first_user_agent()))
45-
print("Last User Agent in file: {0}".format(ua.get_last_user_agent()))
63+
if ua.agent_file:
64+
print("Number of User Agent headers: {0}".format(ua.get_len_user_agent()))
65+
print("First User Agent in file: {0}".format(ua.get_first_user_agent()))
66+
print("Last User Agent in file: {0}".format(ua.get_last_user_agent()))
67+
else:
68+
print("Using up-to-date user agents from online databse.")
4669
print("If you want one random header for a request, you may use the following header:\n")
4770
print("User-Agent: " + ua.get_random_user_agent() + "\n")

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ pytest-cov >= 2.5.1
77
python-dateutil >= 2.6.1
88
requests >= 2.18.4
99
pyOpenSSL >= 17.5.0
10+
fake-useragent >= 0.1.10

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ def run_tests(self):
6666
'psutil >= 5.4.3',
6767
'python-dateutil >= 2.6.1',
6868
'requests >= 2.18.4',
69-
'pyOpenSSL >= 17.5.0'
69+
'pyOpenSSL >= 17.5.0',
70+
'fake-useragent >= 0.1.10'
7071
],
7172
use_scm_version=True,
7273
setup_requires=['setuptools-scm', 'pytest-runner'],

tests/test_useragent.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,27 @@
1313

1414
class TestBaseProxyParsers(unittest.TestCase):
1515
def setUp(self):
16-
self.ua = UserAgentManager()
16+
agentsfile = os.path.join(os.path.dirname(__file__), '../http_request_randomizer/requests/data/user_agents.txt')
17+
self.uafile = UserAgentManager(file=agentsfile)
18+
self.uafake = UserAgentManager()
1719

1820
def test_agent_size(self):
19-
self.assertTrue(self.ua.get_len_user_agent() >= 899)
21+
self.assertTrue(self.uafile.get_len_user_agent() >= 899)
22+
self.assertIsNone(self.uafake.get_len_user_agent())
2023

2124
def test_fist_user_agent(self):
2225
expected = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0"
23-
self.assertEquals(self.ua.get_first_user_agent(), expected)
26+
self.assertEqual(self.uafile.get_first_user_agent(), expected)
27+
self.assertIsNone(self.uafake.get_first_user_agent())
2428

2529
def test_last_user_agent(self):
2630
expected = "Opera/9.80 (Windows NT 5.1; U; ru) Presto/2.2.15 Version/10.0"
27-
self.assertEquals(self.ua.get_last_user_agent(), expected)
31+
self.assertEqual(self.uafile.get_last_user_agent(), expected)
32+
self.assertIsNone(self.uafake.get_last_user_agent())
2833

2934
def test_random_user_agent(self):
30-
self.assertNotEqual(self.ua.get_random_user_agent(), self.ua.get_random_user_agent())
35+
self.assertNotEqual(self.uafile.get_random_user_agent(), self.uafile.get_random_user_agent())
36+
self.assertNotEqual(self.uafake.get_random_user_agent(), self.uafake.get_random_user_agent())
3137

3238

3339
if __name__ == '__main__':

0 commit comments

Comments
 (0)