Skip to content

Commit 9fbdf40

Browse files
committed
Fixed empty features in annotation, replace . in HanS annotations, do not ignore string literatls, minor logic changes
1 parent 0ca5f1a commit 9fbdf40

1 file changed

Lines changed: 15 additions & 15 deletions

File tree

  • SecurityKeywordsBasedSearchTool/SecFeatFinder

SecurityKeywordsBasedSearchTool/SecFeatFinder/main.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
123123
hans_end_pattern = re.compile(r"//\s*&end\[(.*?)\]") # Match // &end[FeatureName]
124124
hans_line_pattern = re.compile(r"&line\[[^\]]+\]") # Match inline annotations
125125

126-
string_literal_pattern = re.compile(r'".*?"') # Match anything inside "..."
127126
single_line_comment_pattern = re.compile(r"//.*") # Match anything after //
128127
multi_line_comment_start_pattern = re.compile(r"/\*") # Match /* (start of multi-line comment)
129128
multi_line_comment_end_pattern = re.compile(r"\*/") # Match */ (end of multi-line comment)
@@ -178,13 +177,10 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
178177
if single_line_comment_pattern.search(stripped_line):
179178
continue
180179

181-
# Remove all string literals from the line before searching for keywords
182-
cleaned_line = string_literal_pattern.sub("", stripped_line)
183-
184180
# Search only non-comment, non-test, non-HAnS-annotated lines
185181
keywords_found = {}
186182
for category, subcategory, keyword_regex in flattened_keywords:
187-
if re.search(keyword_regex, cleaned_line, re.IGNORECASE):
183+
if re.search(keyword_regex, stripped_line, re.IGNORECASE):
188184
key = f"{category} : {subcategory}"
189185
if key not in keywords_found:
190186
keywords_found[key] = []
@@ -201,7 +197,7 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
201197
for key, keywords in keywords_found.items():
202198
if key not in matches[line_number]["Keywords Found"]:
203199
matches[line_number]["Keywords Found"][key] = []
204-
matches[line_number]["Keywords Found"][key].extend(keywords)
200+
matches[line_number]["Keywords Found"][key].append(keywords)
205201
for keyword_regex in keywords:
206202
# Increment the counter with the correct category, subcategory, and keyword
207203
keyword_counter[(key.split(" : ")[0], key.split(" : ")[1], keyword_regex)] += 1
@@ -228,28 +224,32 @@ def search_keywords_in_file(file_path, flattened_keywords, repo_dir,
228224

229225
return os.path.basename(file_path), short_path, list(matches.values())
230226

227+
231228
def write(file_path, lines):
232229
with open(file_path, "w", encoding="utf-8") as file:
233230
file.writelines(lines)
234231

235232

236-
def determine_feature(pos_counter, matches, line_number, fm):
237-
features = []
238-
for match in list(matches[line_number]["Keywords Found"].items()):
239-
if len(features) > 0:
240-
features += ', '
241-
path = match[0].split(' : ')
242-
length = len(path)
243-
value = (
244-
match[1][0]
233+
def sanitize_for_hans(name):
234+
return (
235+
name
245236
.replace('[', '')
246237
.replace(']', '')
247238
.replace('(', '')
248239
.replace(')', '')
249240
.replace('*', '')
250241
.replace('?', '')
242+
.replace('.', '')
251243
)
252244

245+
246+
def determine_feature(pos_counter, matches, line_number, fm):
247+
features = []
248+
for match in list(matches[line_number]["Keywords Found"].items()):
249+
path = match[0].split(' : ')
250+
length = len(path)
251+
value = sanitize_for_hans(match[1][0])
252+
253253
feature_name = f'KeywordMatch|{path[length - 1]}|{value}'
254254
features.append(feature_name)
255255

0 commit comments

Comments
 (0)