@@ -25,7 +25,7 @@ def flatten_keywords(keyword_dict):
2525 return flattened
2626
2727
28- def process_feature_annotations (features_file , repo_dir , flattened_keywords , taxonomy , fm ):
28+ def add_api_feature_annotations (features_file , repo_dir , flattened_keywords , taxonomy , fm ):
2929 if os .path .exists (features_file ):
3030 with open (features_file , "r" ) as file :
3131 data = json .load (file )
@@ -34,50 +34,66 @@ def process_feature_annotations(features_file, repo_dir, flattened_keywords, tax
3434 sys .exit (1 )
3535
3636 library_features = set ()
37+ line_annotations = defaultdict (dict )
3738
3839 for source in data .get ('sources' , []):
39- for feature in source .get ('files' , []):
40- file_path = os .path .join (repo_dir , feature .get ('path' , '' ))
41- if not os .path .exists (file_path ) or not feature .get ('apiCalls' ):
40+ for file_reference in source .get ('files' , []):
41+ file_path = os .path .join (repo_dir , file_reference .get ('path' , '' ))
42+ if not os .path .exists (file_path ) or not file_reference .get ('apiCalls' ):
4243 continue
43-
44- with open (file_path , "r" , encoding = "utf-8" , errors = "ignore" ) as f :
45- lines = f .readlines ()
46-
4744 # Collect annotations per line
48- line_annotations = defaultdict (set )
4945
50- for api_call in feature ['apiCalls' ]:
46+ for api_call in file_reference ['apiCalls' ]:
5147 line_index = api_call .get ('line' , 0 )
5248 feature_names = api_call .get ('features' , [])
5349 method_name = api_call .get ('api' , '' ).split ('.' )[- 1 ]
5450
55- if feature_names and line_index < len (lines ):
56- for feature_name in feature_names :
57- tag = f"APIMatch|{ feature_name } |{ method_name } "
58- line_annotations [line_index ].add (tag )
59- library_features .add (tag )
60- if add_to_fm (fm , taxonomy , feature_name , tag ) is None :
61- print (f"Feature '{ feature_name } ' not found in taxonomy, skipped for now." )
62-
63- # Apply annotations to lines
64- for line_index , tags in line_annotations .items ():
65- annotation = ""
66- if len (tags ) == 1 :
67- tag = next (iter (tags ))
68- annotation = f"// &line[{ tag } ]"
69- else :
70- tags_str = ", " .join (sorted (tags ))
71- annotation = f"// &line[{ tags_str } ]"
72-
73- if annotation not in lines [line_index ]:
74- lines [line_index ] = lines [line_index ].rstrip () + f" { annotation } \n "
75-
76- with open (file_path , "w" , encoding = "utf-8" ) as f :
77- f .writelines (lines )
51+ for feature_name in feature_names :
52+ tag = f"APIMatch|{ feature_name } |{ method_name } "
53+
54+ line_dict = line_annotations .get (file_path )
55+ if line_dict is None :
56+ line_dict = defaultdict (set )
57+ line_annotations [file_path ] = line_dict
58+
59+ feature_list = line_dict .get (line_index )
60+ if feature_list is None :
61+ feature_list = set ()
62+ line_dict [line_index ] = feature_list
63+
64+ feature_list .add (tag )
65+ library_features .add (tag )
66+ if add_to_fm (fm , taxonomy , feature_name , tag ) is None :
67+ print (f"Feature '{ feature_name } ' not found in taxonomy, skipped for now." )
68+
69+ for file_path , values in line_annotations .items ():
70+ with open (file_path , "r" , encoding = "utf-8" , errors = "ignore" ) as file :
71+ lines = file .readlines ()
72+
73+ # Apply annotations to lines
74+ for line_index , tags in values .items ():
75+ if line_index >= len (lines ):
76+ print (f"Warning: Line index { line_index } exceeds the number of lines in { file_path } . Skipping adding annotation for: { tags } ." )
77+ continue
78+ new_line = createLineAnnotation (lines [line_index ].rstrip (), tags )
79+ lines [line_index ] = new_line
80+
81+ write (file_path , lines )
7882 return library_features
7983
8084
85+ def createLineAnnotation (line , tags ):
86+ annotation = ""
87+ if len (tags ) == 1 :
88+ tag = next (iter (tags ))
89+ annotation = f"// &line[{ tag } ]"
90+ else :
91+ tags_str = ", " .join (sorted (tags ))
92+ annotation = f"// &line[{ tags_str } ]"
93+
94+ return line .rstrip () + f" { annotation } \n "
95+
96+
8197def get_subtree (flattened_keywords , feature_name ):
8298 """Search in the flattened keywords for the given feature name and return the result."""
8399 for category , subcategory , keyword in flattened_keywords :
@@ -97,7 +113,6 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
97113 short_path = os .path .relpath (file_path , repo_dir )
98114 if "src\\ " in short_path :
99115 short_path = short_path [short_path .index ("src\\ " ):]
100- updated_lines = [] # Store updated lines with added comments
101116 hans_lines_seen = set () # Store unique Hans line patterns
102117
103118 in_multiline_comment = False
@@ -114,12 +129,13 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
114129 multi_line_comment_end_pattern = re .compile (r"\*/" ) # Match */ (end of multi-line comment)
115130
116131 with open (file_path , "r" , encoding = "utf-8" , errors = "ignore" ) as file :
117- for line_number , line in enumerate (file , start = 1 ):
132+ lines = file .readlines ()
133+ line_number = 0
134+ for line in lines :
118135 stripped_line = line .strip ()
119136
120137 # Skip import statements
121138 if "import" in stripped_line :
122- updated_lines .append (line )
123139 continue
124140
125141 # Skip lines that are individually annotated with HAnS
@@ -129,14 +145,12 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
129145 if match not in hans_lines_seen : # Count only if not seen before
130146 hans_exclusion_counter [0 ] += 1
131147 hans_lines_seen .add (match )
132- updated_lines .append (line )
133148 continue
134149
135150 # Handle multi-line comments
136151 if multi_line_comment_start_pattern .search (stripped_line ):
137152 in_multiline_comment = True
138153 if in_multiline_comment :
139- updated_lines .append (line )
140154 if multi_line_comment_end_pattern .search (stripped_line ):
141155 in_multiline_comment = False
142156 continue
@@ -153,17 +167,15 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
153167 in_hans_annotated_block = True
154168 if hans_end_pattern .search (stripped_line ):
155169 in_hans_annotated_block = False
156- updated_lines . append ( line ) # Preserve the closing annotation
170+ # Preserve the closing annotation
157171 continue
158172
159173 # Skip lines inside test contexts or inside a HAnS-annotated block
160174 if in_testing_context or in_hans_annotated_block :
161- updated_lines .append (line )
162175 continue
163176
164177 # Skip single-line comments
165178 if single_line_comment_pattern .search (stripped_line ):
166- updated_lines .append (line )
167179 continue
168180
169181 # Remove all string literals from the line before searching for keywords
@@ -197,15 +209,10 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
197209 # Add begin and end comments only once for the line
198210 features , fm = determine_feature (pos_counter , matches , line_number , fm )
199211
200- comment_start = "// &begin[" + features + "]\n "
201- updated_lines .append (comment_start )
202- updated_lines .append (line ) # Add the line with the match
203- comment_end = "// &end[" + features + "]\n "
204- updated_lines .append (comment_end )
212+ lines [line_number ] = createLineAnnotation (line , features )
205213 pos_list .append (f"Pos{ pos_counter [0 ]} " )
206214
207- else :
208- updated_lines .append (line )
215+ line_number += 1
209216
210217 # Consolidate the "Keywords Found" for each line
211218 for match in matches .values ():
@@ -216,14 +223,18 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
216223 match ["Keywords Found" ] = ", " .join (consolidated_keywords )
217224
218225 # Save updated file with comments
219- with open ( file_path , "w" , encoding = "utf-8" ) as file :
220- file . writelines ( updated_lines )
226+ if len ( matches . values ()) > 0 :
227+ write ( file_path , lines )
221228
222229 return os .path .basename (file_path ), short_path , list (matches .values ())
223230
231+ def write (file_path , lines ):
232+ with open (file_path , "w" , encoding = "utf-8" ) as file :
233+ file .writelines (lines )
234+
224235
225236def determine_feature (pos_counter , matches , line_number , fm ):
226- features = ''
237+ features = []
227238 for match in list (matches [line_number ]["Keywords Found" ].items ()):
228239 if len (features ) > 0 :
229240 features += ', '
@@ -240,7 +251,7 @@ def determine_feature(pos_counter, matches, line_number, fm):
240251 )
241252
242253 feature_name = f'KeywordMatch|{ path [length - 1 ]} |{ value } '
243- features += feature_name
254+ features . append ( feature_name )
244255
245256 current = fm
246257 i = 0
@@ -341,7 +352,7 @@ def main():
341352 fm = Feature (taxonomy .name , None )
342353
343354 # Process library annotations first
344- library_features = process_feature_annotations (project_dir + "/result/features.json" , project_dir , flattened_keywords , taxonomy , fm )
355+ library_features = add_api_feature_annotations (project_dir + "/result/features.json" , project_dir , flattened_keywords , taxonomy , fm )
345356
346357 # Initialize the exclusion counter ONCE here
347358 hans_exclusion_counter = [0 ]
0 commit comments