@@ -123,7 +123,6 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
123123 hans_end_pattern = re .compile (r"//\s*&end\[(.*?)\]" ) # Match // &end[FeatureName]
124124 hans_line_pattern = re .compile (r"&line\[[^\]]+\]" ) # Match inline annotations
125125
126- string_literal_pattern = re .compile (r'".*?"' ) # Match anything inside "..."
127126 single_line_comment_pattern = re .compile (r"//.*" ) # Match anything after //
128127 multi_line_comment_start_pattern = re .compile (r"/\*" ) # Match /* (start of multi-line comment)
129128 multi_line_comment_end_pattern = re .compile (r"\*/" ) # Match */ (end of multi-line comment)
@@ -178,13 +177,10 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
178177 if single_line_comment_pattern .search (stripped_line ):
179178 continue
180179
181- # Remove all string literals from the line before searching for keywords
182- cleaned_line = string_literal_pattern .sub ("" , stripped_line )
183-
184180 # Search only non-comment, non-test, non-HAnS-annotated lines
185181 keywords_found = {}
186182 for category , subcategory , keyword_regex in flattened_keywords :
187- if re .search (keyword_regex , cleaned_line , re .IGNORECASE ):
183+ if re .search (keyword_regex , stripped_line , re .IGNORECASE ):
188184 key = f"{ category } : { subcategory } "
189185 if key not in keywords_found :
190186 keywords_found [key ] = []
@@ -201,7 +197,7 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
201197 for key , keywords in keywords_found .items ():
202198 if key not in matches [line_number ]["Keywords Found" ]:
203199 matches [line_number ]["Keywords Found" ][key ] = []
204- matches [line_number ]["Keywords Found" ][key ].extend (keywords )
200+ matches [line_number ]["Keywords Found" ][key ].append (keywords )
205201 for keyword_regex in keywords :
206202 # Increment the counter with the correct category, subcategory, and keyword
207203 keyword_counter [(key .split (" : " )[0 ], key .split (" : " )[1 ], keyword_regex )] += 1
@@ -228,28 +224,32 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
228224
229225 return os .path .basename (file_path ), short_path , list (matches .values ())
230226
227+
231228def write (file_path , lines ):
232229 with open (file_path , "w" , encoding = "utf-8" ) as file :
233230 file .writelines (lines )
234231
235232
236- def determine_feature (pos_counter , matches , line_number , fm ):
237- features = []
238- for match in list (matches [line_number ]["Keywords Found" ].items ()):
239- if len (features ) > 0 :
240- features += ', '
241- path = match [0 ].split (' : ' )
242- length = len (path )
243- value = (
244- match [1 ][0 ]
233+ def sanitize_for_hans (name ):
234+ return (
235+ name
245236 .replace ('[' , '' )
246237 .replace (']' , '' )
247238 .replace ('(' , '' )
248239 .replace (')' , '' )
249240 .replace ('*' , '' )
250241 .replace ('?' , '' )
242+ .replace ('.' , '' )
251243 )
252244
245+
246+ def determine_feature (pos_counter , matches , line_number , fm ):
247+ features = []
248+ for match in list (matches [line_number ]["Keywords Found" ].items ()):
249+ path = match [0 ].split (' : ' )
250+ length = len (path )
251+ value = sanitize_for_hans (match [1 ][0 ])
252+
253253 feature_name = f'KeywordMatch|{ path [length - 1 ]} |{ value } '
254254 features .append (feature_name )
255255
0 commit comments