@@ -34,40 +34,61 @@ def check_file_type(file_path, file_types):
3434 return False
3535
3636
37- def include_file (file_path , white_list_patterns ):
37+ def include_file (file_path , white_list_patterns = None , include_patterns = None ):
3838 """
3939 Check a file path for inclusion based on an OR regular expression.
4040 The user is currently not notified if a file is marked for removal.
4141
4242 Args:
4343 - file_path (str) : a file path to check if should be included.
44- - white_list_patterns (list) : list of patterns to whitelist (include).
44+ - white_list_patterns (list) : list of patterns to whitelist (not test).
45+ - include_patterns (list) : list of patterns to include.
4546
4647 Returns:
47- (bool) boolean indicating if the URL should be white listed (included ).
48+ (bool) boolean indicating if the URL should be white listed (not tested ).
4849 """
50+ include_patterns = include_patterns or []
51+ white_list_patterns = white_list_patterns or []
52+
4953 # No white listed patterns, all files are included
50- if not white_list_patterns :
54+ if not white_list_patterns and not include_patterns :
5155 return True
5256
53- # Return False (don't include) if the patterns match
54- regexp = "(%s)" % "|" .join (white_list_patterns )
55- return not re .search (regexp , file_path )
57+ # Create a regular expression for each
58+ whitelist_regexp = "(%s)" % "|" .join (white_list_patterns )
59+ include_regexp = "(%s)" % "|" .join (include_patterns )
60+
61+ # Return False (don't include) if whitelisted
62+ if not include_patterns :
63+ return not re .search (whitelist_regexp , file_path )
64+
65+ # We have an include_patterns only
66+ elif not white_list_patterns :
67+ return re .search (include_regexp , file_path )
68+
69+ # If both defined, whitelisting takes preference
70+ return re .search (include_regexp , file_path ) and not re .search (
71+ whitelist_regexp , file_path
72+ )
5673
5774
58- def get_file_paths (base_path , file_types , white_listed_files = None ):
75+ def get_file_paths (
76+ base_path , file_types , white_listed_files = None , include_patterns = None
77+ ):
5978 """
6079 Get path to all files under a give directory and its subfolders.
6180
6281 Args:
6382 - base_path (str) : base path.
6483 - file_types (list) : list of file extensions to accept.
84+ - include_patterns (list) : list of files and patterns to include.
6585 - white_listed_files (list) : list of files or patterns to white list
6686
6787 Returns:
6888 (list) list of file paths.
6989 """
7090 white_listed_files = white_listed_files or []
91+ include_patterns = include_patterns or []
7192
7293 # init paths
7394 file_paths = []
@@ -79,7 +100,9 @@ def get_file_paths(base_path, file_types, white_listed_files=None):
79100 for file in files
80101 if os .path .isfile (os .path .join (root , file ))
81102 and check_file_type (file , file_types )
82- and include_file (os .path .join (root , file ), white_listed_files )
103+ and include_file (
104+ os .path .join (root , file ), white_listed_files , include_patterns
105+ )
83106 ]
84107 return file_paths
85108
@@ -145,7 +168,7 @@ def save_results(check_results, file_path, sep=",", header=None):
145168 dirname = os .path .dirname (file_path )
146169
147170 if not os .path .exists (dirname ):
148- sys .exit ("%s does not exist, cannot save %s there." % (dirname , file_path ))
171+ sys .exit ("%s does not exist, cannot save %s there." % (dirname , file_path ))
149172
150173 # Ensure the header is provided and correct (length 2)
151174 if not header :
@@ -157,10 +180,10 @@ def save_results(check_results, file_path, sep=",", header=None):
157180 print ("Saving results to %s" % file_path )
158181
159182 # Write to file after header row
160- with open (file_path , mode = 'w' ) as fd :
183+ with open (file_path , mode = "w" ) as fd :
161184 writer = csv .writer (fd , delimiter = sep , quotechar = '"' , quoting = csv .QUOTE_MINIMAL )
162185 writer .writerow (header )
163186 for result , items in check_results .items ():
164- [writer .writerow ([item , result ]) for item in items ];
187+ [writer .writerow ([item , result ]) for item in items ]
165188
166189 return file_path
0 commit comments