Skip to content

Commit 8bfa7ca

Browse files
committed
Fixed bugs
1 parent c9f51c8 commit 8bfa7ca

1 file changed

Lines changed: 55 additions & 51 deletions

File tree

  • SecurityKeywordsBasedSearchTool/SecFeatFinder

SecurityKeywordsBasedSearchTool/SecFeatFinder/main.py

Lines changed: 55 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
135135

136136
# Skip import statements
137137
if "import" in stripped_line:
138+
line_number += 1
138139
continue
139140

140141
# Skip lines that are individually annotated with HAnS
@@ -144,6 +145,7 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
144145
if match not in hans_lines_seen: # Count only if not seen before
145146
hans_exclusion_counter[0] += 1
146147
hans_lines_seen.add(match)
148+
line_number += 1
147149
continue
148150

149151
# Handle multi-line comments
@@ -152,6 +154,7 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
152154
if in_multiline_comment:
153155
if multi_line_comment_end_pattern.search(stripped_line):
154156
in_multiline_comment = False
157+
line_number += 1
155158
continue
156159

157160
# Detect if inside a test class or function
@@ -167,24 +170,27 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
167170
if hans_end_pattern.search(stripped_line):
168171
in_hans_annotated_block = False
169172
# Preserve the closing annotation
173+
line_number += 1
170174
continue
171175

172176
# Skip lines inside test contexts or inside a HAnS-annotated block
173177
if in_testing_context or in_hans_annotated_block:
178+
line_number += 1
174179
continue
175180

176181
# Skip single-line comments
177182
if single_line_comment_pattern.search(stripped_line):
183+
line_number += 1
178184
continue
179185

180186
# Search only non-comment, non-test, non-HAnS-annotated lines
181187
keywords_found = {}
182188
for category, subcategory, keyword_regex in flattened_keywords:
183189
if re.search(keyword_regex, stripped_line, re.IGNORECASE):
184-
key = f"{category} : {subcategory}"
185-
if key not in keywords_found:
186-
keywords_found[key] = []
187-
keywords_found[key].append(keyword_regex)
190+
fm_feature = f"{category} : {subcategory}"
191+
if fm_feature not in keywords_found:
192+
keywords_found[fm_feature] = []
193+
keywords_found[fm_feature].append(keyword_regex)
188194

189195
if keywords_found:
190196
if line_number not in matches:
@@ -194,13 +200,13 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
194200
"Keywords Found": {}
195201
}
196202
# Merge all found keywords for the same category and subcategory
197-
for key, keywords in keywords_found.items():
198-
if key not in matches[line_number]["Keywords Found"]:
199-
matches[line_number]["Keywords Found"][key] = []
200-
matches[line_number]["Keywords Found"][key].extend(keywords)
203+
for fm_feature, keywords in keywords_found.items():
204+
if fm_feature not in matches[line_number]["Keywords Found"]:
205+
matches[line_number]["Keywords Found"][fm_feature] = []
206+
matches[line_number]["Keywords Found"][fm_feature].extend(keywords)
201207
for keyword_regex in keywords:
202208
# Increment the counter with the correct category, subcategory, and keyword
203-
keyword_counter[(key.split(" : ")[0], key.split(" : ")[1], keyword_regex)] += 1
209+
keyword_counter[(fm_feature.split(" : ")[0], fm_feature.split(" : ")[1], keyword_regex)] += 1
204210

205211
# Add begin and end comments only once for the line
206212
features, fm = determine_feature(pos_counter, matches, line_number, fm)
@@ -213,13 +219,13 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
213219
# Consolidate the "Keywords Found" for each line
214220
for match in matches.values():
215221
consolidated_keywords = []
216-
for key, keywords in match["Keywords Found"].items():
222+
for fm_feature, keywords in match["Keywords Found"].items():
217223
unique_keywords = set(keywords)
218-
consolidated_keywords.append(f"{key} : [{', '.join(unique_keywords)}]")
224+
consolidated_keywords.append(f"{fm_feature} : [{', '.join(unique_keywords)}]")
219225
match["Keywords Found"] = ", ".join(consolidated_keywords)
220226

221227
# Save updated file with comments
222-
if len(matches.values()) > 0:
228+
if len(matches.values()) > 0:
223229
write(file_path, lines)
224230

225231
return os.path.basename(file_path), short_path, list(matches.values())
@@ -230,50 +236,48 @@ def write(file_path, lines):
230236
file.writelines(lines)
231237

232238

233-
def sanitize_for_hans(name):
234-
return (
235-
name
236-
.replace('[', '')
237-
.replace(']', '')
238-
.replace('(', '')
239-
.replace(')', '')
240-
.replace('*', '')
241-
.replace('?', '')
242-
.replace('.', '')
243-
)
244-
245-
246239
def determine_feature(pos_counter, matches, line_number, fm):
247240
features = []
248241
for match in list(matches[line_number]["Keywords Found"].items()):
249242
path = match[0].split(' : ')
250243
length = len(path)
251-
value = sanitize_for_hans(match[1][0])
252-
253-
feature_name = f'KeywordMatch|{path[length - 1]}|{value}'
254-
features.append(feature_name)
255-
256-
current = fm
257-
i = 0
258-
while i < length:
259-
name = path[i]
260-
found = False
261-
for sub in current.sub_features:
262-
if name == sub.name:
263-
current = sub
264-
found = True
244+
for keyword in match[1]:
245+
value = (
246+
keyword
247+
.replace('[', '')
248+
.replace(']', '')
249+
.replace('(', '')
250+
.replace(')', '')
251+
.replace('*', '')
252+
.replace('?', '')
253+
.replace('.', '')
254+
)
255+
feature_name = f'KeywordMatch|{path[length - 1]}|{value}'
256+
features.append(feature_name)
257+
258+
current = fm
259+
# Search for the taxonomy feature in the feature model
260+
i = 0
261+
while i < length:
262+
name = path[i]
263+
found = False
264+
for sub in current.sub_features:
265+
if name == sub.name:
266+
current = sub
267+
found = True
268+
break
269+
if not found:
270+
current = Feature(name, current)
271+
i += 1
272+
273+
# Add the feature for the keyword to the feature model if it doesn't exist
274+
exists = False
275+
for f in current.sub_features:
276+
if f.name == feature_name:
277+
exists = True
265278
break
266-
if not found:
267-
current = Feature(name, current)
268-
i += 1
269-
270-
exists = False
271-
for f in current.sub_features:
272-
if f.name == feature_name:
273-
exists = True
274-
break
275-
if not exists:
276-
Feature(feature_name, current)
279+
if not exists:
280+
Feature(feature_name, current)
277281
return features, fm
278282

279283

@@ -329,7 +333,7 @@ def print_top_keywords(keyword_counter, total_matches):
329333
def main():
330334
repo_url = input("Enter the repository URL: ")
331335
keyword_file = "SecList.json"
332-
taxonomy_file = "../Resources/taxonomy.feature_model"
336+
taxonomy_file = "../../Resources/taxonomy.feature_model"
333337

334338
taxonomy = read_feature_model(taxonomy_file)
335339
if taxonomy is None:

0 commit comments

Comments
 (0)