@@ -135,6 +135,7 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
135135
136136 # Skip import statements
137137 if "import" in stripped_line :
138+ line_number += 1
138139 continue
139140
140141 # Skip lines that are individually annotated with HAnS
@@ -144,6 +145,7 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
144145 if match not in hans_lines_seen : # Count only if not seen before
145146 hans_exclusion_counter [0 ] += 1
146147 hans_lines_seen .add (match )
148+ line_number += 1
147149 continue
148150
149151 # Handle multi-line comments
@@ -152,6 +154,7 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
152154 if in_multiline_comment :
153155 if multi_line_comment_end_pattern .search (stripped_line ):
154156 in_multiline_comment = False
157+ line_number += 1
155158 continue
156159
157160 # Detect if inside a test class or function
@@ -167,24 +170,27 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
167170 if hans_end_pattern .search (stripped_line ):
168171 in_hans_annotated_block = False
169172 # Preserve the closing annotation
173+ line_number += 1
170174 continue
171175
172176 # Skip lines inside test contexts or inside a HAnS-annotated block
173177 if in_testing_context or in_hans_annotated_block :
178+ line_number += 1
174179 continue
175180
176181 # Skip single-line comments
177182 if single_line_comment_pattern .search (stripped_line ):
183+ line_number += 1
178184 continue
179185
180186 # Search only non-comment, non-test, non-HAnS-annotated lines
181187 keywords_found = {}
182188 for category , subcategory , keyword_regex in flattened_keywords :
183189 if re .search (keyword_regex , stripped_line , re .IGNORECASE ):
184- key = f"{ category } : { subcategory } "
185- if key not in keywords_found :
186- keywords_found [key ] = []
187- keywords_found [key ].append (keyword_regex )
190+ fm_feature = f"{ category } : { subcategory } "
191+ if fm_feature not in keywords_found :
192+ keywords_found [fm_feature ] = []
193+ keywords_found [fm_feature ].append (keyword_regex )
188194
189195 if keywords_found :
190196 if line_number not in matches :
@@ -194,13 +200,13 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
194200 "Keywords Found" : {}
195201 }
196202 # Merge all found keywords for the same category and subcategory
197- for key , keywords in keywords_found .items ():
198- if key not in matches [line_number ]["Keywords Found" ]:
199- matches [line_number ]["Keywords Found" ][key ] = []
200- matches [line_number ]["Keywords Found" ][key ].extend (keywords )
203+ for fm_feature , keywords in keywords_found .items ():
204+ if fm_feature not in matches [line_number ]["Keywords Found" ]:
205+ matches [line_number ]["Keywords Found" ][fm_feature ] = []
206+ matches [line_number ]["Keywords Found" ][fm_feature ].extend (keywords )
201207 for keyword_regex in keywords :
202208 # Increment the counter with the correct category, subcategory, and keyword
203- keyword_counter [(key .split (" : " )[0 ], key .split (" : " )[1 ], keyword_regex )] += 1
209+ keyword_counter [(fm_feature .split (" : " )[0 ], fm_feature .split (" : " )[1 ], keyword_regex )] += 1
204210
205211 # Add begin and end comments only once for the line
206212 features , fm = determine_feature (pos_counter , matches , line_number , fm )
@@ -213,13 +219,13 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
213219 # Consolidate the "Keywords Found" for each line
214220 for match in matches .values ():
215221 consolidated_keywords = []
216- for key , keywords in match ["Keywords Found" ].items ():
222+ for fm_feature , keywords in match ["Keywords Found" ].items ():
217223 unique_keywords = set (keywords )
218- consolidated_keywords .append (f"{ key } : [{ ', ' .join (unique_keywords )} ]" )
224+ consolidated_keywords .append (f"{ fm_feature } : [{ ', ' .join (unique_keywords )} ]" )
219225 match ["Keywords Found" ] = ", " .join (consolidated_keywords )
220226
221227 # Save updated file with comments
222- if len (matches .values ()) > 0 :
228+ if len (matches .values ()) > 0 :
223229 write (file_path , lines )
224230
225231 return os .path .basename (file_path ), short_path , list (matches .values ())
@@ -230,50 +236,48 @@ def write(file_path, lines):
230236 file .writelines (lines )
231237
232238
233- def sanitize_for_hans (name ):
234- return (
235- name
236- .replace ('[' , '' )
237- .replace (']' , '' )
238- .replace ('(' , '' )
239- .replace (')' , '' )
240- .replace ('*' , '' )
241- .replace ('?' , '' )
242- .replace ('.' , '' )
243- )
244-
245-
246239def determine_feature (pos_counter , matches , line_number , fm ):
247240 features = []
248241 for match in list (matches [line_number ]["Keywords Found" ].items ()):
249242 path = match [0 ].split (' : ' )
250243 length = len (path )
251- value = sanitize_for_hans (match [1 ][0 ])
252-
253- feature_name = f'KeywordMatch|{ path [length - 1 ]} |{ value } '
254- features .append (feature_name )
255-
256- current = fm
257- i = 0
258- while i < length :
259- name = path [i ]
260- found = False
261- for sub in current .sub_features :
262- if name == sub .name :
263- current = sub
264- found = True
244+ for keyword in match [1 ]:
245+ value = (
246+ keyword
247+ .replace ('[' , '' )
248+ .replace (']' , '' )
249+ .replace ('(' , '' )
250+ .replace (')' , '' )
251+ .replace ('*' , '' )
252+ .replace ('?' , '' )
253+ .replace ('.' , '' )
254+ )
255+ feature_name = f'KeywordMatch|{ path [length - 1 ]} |{ value } '
256+ features .append (feature_name )
257+
258+ current = fm
259+ # Search for the taxonomy feature in the feature model
260+ i = 0
261+ while i < length :
262+ name = path [i ]
263+ found = False
264+ for sub in current .sub_features :
265+ if name == sub .name :
266+ current = sub
267+ found = True
268+ break
269+ if not found :
270+ current = Feature (name , current )
271+ i += 1
272+
273+ # Add the feature for the keyword to the feature model if it doesn't exist
274+ exists = False
275+ for f in current .sub_features :
276+ if f .name == feature_name :
277+ exists = True
265278 break
266- if not found :
267- current = Feature (name , current )
268- i += 1
269-
270- exists = False
271- for f in current .sub_features :
272- if f .name == feature_name :
273- exists = True
274- break
275- if not exists :
276- Feature (feature_name , current )
279+ if not exists :
280+ Feature (feature_name , current )
277281 return features , fm
278282
279283
@@ -329,7 +333,7 @@ def print_top_keywords(keyword_counter, total_matches):
329333def main ():
330334 repo_url = input ("Enter the repository URL: " )
331335 keyword_file = "SecList.json"
332- taxonomy_file = "../Resources/taxonomy.feature_model"
336+ taxonomy_file = "../../ Resources/taxonomy.feature_model"
333337
334338 taxonomy = read_feature_model (taxonomy_file )
335339 if taxonomy is None :
0 commit comments