11import os
22import random
3+ from fake_useragent import FakeUserAgent
4+ import logging
35
6+ logger = logging .getLogger (__name__ )
47
58class UserAgentManager :
6- def __init__ (self , agent_file = os .path .join (os .path .dirname (__file__ ), '../data/user_agents.txt' )):
7- self .agent_file = agent_file
8- self .useragents = self .load_user_agents (self .agent_file )
9+ def __init__ (self , fallback = None , file = None ):
10+ self .agent_file = file
11+ if file is not None :
12+ logger .info ('Using local file for user agents: ' + self .agent_file )
13+ self .useragents = self .load_user_agents (self .agent_file )
14+ else :
15+ logger .info ('Using fake-useragent package for user agents.' )
16+ if fallback is None :
17+ fallback = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
18+ self .fakeuseragent = FakeUserAgent (fallback = fallback , cache = False )
919
1020 def load_user_agents (self , useragentsfile ):
1121 """
12- useragentfile : string
22+ useragentsfile : string
1323 path to text file of user agents, one per line
1424 """
1525 useragents = []
@@ -20,28 +30,41 @@ def load_user_agents(self, useragentsfile):
2030 return useragents
2131
2232 def get_random_user_agent (self ):
23- """
24- useragents : string array of different user agents
25- :param useragents:
26- :return random agent:
27- """
28- user_agent = random .choice (self .useragents )
29- return user_agent .decode ('utf-8' )
33+ if self .agent_file :
34+ user_agent = random .choice (self .useragents )
35+ return user_agent .decode ('utf-8' )
36+ else :
37+ return self .fakeuseragent .random
3038
3139 def get_first_user_agent (self ):
32- return self .useragents [0 ].decode ('utf-8' )
40+ if self .agent_file :
41+ return self .useragents [0 ].decode ('utf-8' )
42+ else :
43+ logger .warning ('Fake-useragent library does not support operaration get_first - change to user-agent file!' )
44+ return None
3345
3446 def get_last_user_agent (self ):
35- return self .useragents [- 1 ].decode ('utf-8' )
47+ if self .agent_file :
48+ return self .useragents [- 1 ].decode ('utf-8' )
49+ else :
50+ logger .warning ('Fake-useragent library does not support operaration get_last - change to user-agent file!' )
51+ return None
3652
3753 def get_len_user_agent (self ):
38- return len (self .useragents )
54+ if self .agent_file :
55+ return len (self .useragents )
56+ else :
57+ logger .warning ('Fake-useragent library does not support operaration get_len - change to user-agent file!' )
58+ return None
3959
4060
4161if __name__ == '__main__' :
4262 ua = UserAgentManager ()
43- print ("Number of User Agent headers: {0}" .format (ua .get_len_user_agent ()))
44- print ("First User Agent in file: {0}" .format (ua .get_first_user_agent ()))
45- print ("Last User Agent in file: {0}" .format (ua .get_last_user_agent ()))
63+ if ua .agent_file :
64+ print ("Number of User Agent headers: {0}" .format (ua .get_len_user_agent ()))
65+ print ("First User Agent in file: {0}" .format (ua .get_first_user_agent ()))
66+ print ("Last User Agent in file: {0}" .format (ua .get_last_user_agent ()))
67+ else :
68+ print ("Using up-to-date user agents from online databse." )
4669 print ("If you want one random header for a request, you may use the following header:\n " )
4770 print ("User-Agent: " + ua .get_random_user_agent () + "\n " )
0 commit comments