Skip to content

Commit a760560

Browse files
committed
further details for required inputs
1 parent bdde519 commit a760560

1 file changed

Lines changed: 38 additions & 22 deletions

File tree

scripts/comment_generation_bug_reports.py

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import csv
2-
import datetime
32
import json
43
import os
54
import subprocess
6-
from datetime import timedelta
5+
from datetime import datetime, timedelta
76

87
import tiktoken
98
from dateutil import parser, tz
@@ -19,7 +18,13 @@
1918
csv.field_size_limit(10**8)
2019

2120
### VARIABLES
21+
22+
# For the input file, we consider the list of reported bugs available here: https://github.com/mozilla/regressors-regressions-dataset
23+
# Clone the repository locally and inform the path to the file dataset.csv
2224
INPUT_FILE = ""
25+
26+
# As we need to access some additional information associated with the commits reported in the INPUT_FILE, we need to have locally the central repository.
27+
# For that, you might clone the repo locally and inform its path below: https://hg-edge.mozilla.org/mozilla-central
2328
LOCAL_MERCURIAL_PATH = ""
2429
REPORT_DIRECTORY = ""
2530
REPORT_FILENAME_GPT = "generated_comment_gpt.csv"
@@ -201,6 +206,16 @@ def filter_comments_using_deepseek(gen_comments, formatted_patch_fix):
201206
return filtered_comments
202207

203208

209+
def filter_comments_using_gpt(formatted_patch_fix, gen_comments, llm):
210+
filtering = LLMChain(
211+
prompt=PromptTemplate.from_template(FILTERING_COMMENTS), llm=llm
212+
)
213+
filtered_comments_gpt = filtering.invoke(
214+
{"bug_summarization": formatted_patch_fix, "comments": gen_comments},
215+
)["text"]
216+
return filtered_comments_gpt
217+
218+
204219
def get_hunk_with_associated_lines(hunk):
205220
hunk_with_lines = ""
206221
for line in hunk:
@@ -252,7 +267,7 @@ def target_file_is_changed_by_bug_and_fix_commits(patch_bug, patch_fix):
252267
return False
253268

254269

255-
def generate_comments_bug_fix(
270+
def generate_code_review_comments(
256271
patch_bug,
257272
patch_fix,
258273
bug_commit_message,
@@ -327,22 +342,18 @@ def generate_comments_bug_fix(
327342
)
328343
)
329344

330-
filtering = LLMChain(
331-
prompt=PromptTemplate.from_template(FILTERING_COMMENTS), llm=llm
345+
filtered_comments_gpt = filter_comments_using_gpt(
346+
formatted_patch_fix, gen_comments, llm
332347
)
333348

334-
filtered_comments_gpt = filtering.invoke(
335-
{"bug_summarization": formatted_patch_fix, "comments": gen_comments},
336-
)["text"]
337-
338349
filtered_comments_deepseek = filter_comments_using_deepseek(
339350
gen_comments, formatted_patch_fix
340351
)
341352

342353
return [filtered_comments_gpt, filtered_comments_deepseek]
343354

344355

345-
def write_bug_info_to_csv(
356+
def save_output_comments(
346357
bug_id,
347358
bug_commit,
348359
bug_tokens,
@@ -576,7 +587,7 @@ def is_commit_done_within_the_last_target_years(commit_date, years):
576587
"text"
577588
]
578589

579-
output = generate_comments_bug_fix(
590+
generated_comments = generate_code_review_comments(
580591
bug_commit_diff,
581592
fix_commit_diff,
582593
bug_commit_message,
@@ -587,17 +598,22 @@ def is_commit_done_within_the_last_target_years(commit_date, years):
587598
fix_summary,
588599
)
589600

590-
if output is not None and len(output) > 1:
591-
comments = output[0]
592-
filtered_deepseek = output[1]
601+
if (
602+
generated_comments is not None
603+
and len(generated_comments) > 1
604+
):
605+
gpt_filtered_comments = generated_comments[0]
606+
deepseek_filtered_comments = generated_comments[1]
593607

594608
if (
595-
comments is not None
596-
and filtered_deepseek is not None
609+
gpt_filtered_comments is not None
610+
and deepseek_filtered_comments is not None
597611
):
598-
if comments is not None:
599-
valid_json = extract_and_parse_json(comments)
600-
write_bug_info_to_csv(
612+
if gpt_filtered_comments is not None:
613+
valid_json = extract_and_parse_json(
614+
gpt_filtered_comments
615+
)
616+
save_output_comments(
601617
bug_id,
602618
bug_commit_hash,
603619
bug_count_tokens,
@@ -609,11 +625,11 @@ def is_commit_done_within_the_last_target_years(commit_date, years):
609625
interval_bug_fix,
610626
REPORT_FILENAME_GPT,
611627
)
612-
if filtered_deepseek is not None:
628+
if deepseek_filtered_comments is not None:
613629
valid_json = extract_and_parse_json(
614-
filtered_deepseek
630+
deepseek_filtered_comments
615631
)
616-
write_bug_info_to_csv(
632+
save_output_comments(
617633
bug_id,
618634
bug_commit_hash,
619635
bug_count_tokens,

0 commit comments

Comments
 (0)