Skip to content
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
218ddfc
Initial working release notes generator
benjaminmah Jan 23, 2025
51e7f34
Fixed prompt and list generation
benjaminmah Jan 24, 2025
6af501b
Fixed prompt and excluded Nightly
benjaminmah Jan 27, 2025
98ea3a9
Added duplicate remover
benjaminmah Jan 28, 2025
a8e9880
Added additional filtering
benjaminmah Jan 28, 2025
afd0d2c
Fixed prompt to clean
benjaminmah Jan 31, 2025
ce3bdce
Added extra conversation
benjaminmah Feb 1, 2025
bc1dd9c
New prompt
benjaminmah Feb 5, 2025
6ba5b88
Changed prompt
benjaminmah Feb 5, 2025
be742b1
Made prompt more strict
benjaminmah Feb 7, 2025
1dacebf
Fixed prompt and increased chunk size
benjaminmah Feb 10, 2025
4f0e081
Removed asterisks
benjaminmah Feb 10, 2025
821790c
Changed version
benjaminmah Feb 11, 2025
34e366a
Added bug filtering for webextensions
benjaminmah Feb 13, 2025
27204d1
Edited prompt
benjaminmah Feb 19, 2025
2033b89
Separated release notes into a runner and tool, updated the method to…
benjaminmah Feb 21, 2025
7638d2a
Fixed up runner to take in only one version
benjaminmah Feb 24, 2025
d9f6831
Moved version to the function
benjaminmah Feb 25, 2025
93a5982
Fixed release notes script to make use of URL instead of local repo
benjaminmah Mar 3, 2025
2b702fe
Removed old script
benjaminmah Mar 3, 2025
35ad073
Removed HTML parsing with json
benjaminmah Mar 4, 2025
2d0030c
Removed .get and response 200
benjaminmah Mar 5, 2025
fbb3c30
Made input and output list instead of string
benjaminmah Mar 5, 2025
3d6f4d0
Using LangChain
benjaminmah Mar 5, 2025
c220bb3
Using data.values()
benjaminmah Mar 5, 2025
1bf92b2
Added LLMChain
benjaminmah Mar 5, 2025
06af4d8
Cleaned up code
benjaminmah Mar 6, 2025
a14ad87
Added typings
benjaminmah Mar 6, 2025
20d0b6e
Removed OpenAI
benjaminmah Mar 6, 2025
b48089f
Changed type hints from List to list
benjaminmah Mar 6, 2025
5bd61ad
Removed regex search for bug id
benjaminmah Mar 6, 2025
2dddedb
Replaced token chunking with commit chunking
benjaminmah Mar 6, 2025
3572bd8
Changed chunk param to commit chunk
benjaminmah Mar 6, 2025
020fed3
Renamed functions
benjaminmah Mar 6, 2025
1c5cbe2
Fixed variable names
benjaminmah Mar 6, 2025
0d173ad
Changed to generator
benjaminmah Mar 7, 2025
030d705
Removed shortlist_with_gpt function
benjaminmah Mar 14, 2025
6326f66
Simplified filtering irrelevant commits
benjaminmah Mar 14, 2025
e25aad5
Removed refining shortlist function
benjaminmah Mar 14, 2025
6418551
Added author filtering
benjaminmah Mar 14, 2025
4140c52
Added generative_model_tool
benjaminmah Mar 14, 2025
b10f809
Fixed up code
benjaminmah Mar 14, 2025
c6eafb8
Generalized previous version function
benjaminmah Mar 14, 2025
1191215
Removed explicit llm arg
benjaminmah Mar 21, 2025
51d6d9f
Replaced regex with inequality
benjaminmah Mar 21, 2025
69af386
Added ignore commit list and specific component/product ignore list
benjaminmah Mar 21, 2025
88cf631
Addressed PR comments
benjaminmah Mar 21, 2025
2c0a3ce
Converted list to set
benjaminmah Mar 24, 2025
66dd826
Added test for previous version
benjaminmah Mar 24, 2025
f177f16
Fixed test to not require downloading DB
benjaminmah Mar 26, 2025
1946dca
Initial cloud function
benjaminmah Apr 7, 2025
75848d9
Moved cloud function file to functions folder
benjaminmah Apr 7, 2025
284c6f2
Added requirements
benjaminmah Apr 7, 2025
89bac35
Fixed args
benjaminmah Apr 7, 2025
ff62313
Fixed args
benjaminmah Apr 7, 2025
4a042fc
Added workflow to deploy
benjaminmah Apr 8, 2025
fbb46ad
Moved workflow file and fixed to trigger every tag rather than every …
benjaminmah Apr 10, 2025
2c5d73c
Addressed PR comments
benjaminmah Apr 10, 2025
bf239d0
Addressed PR comments
benjaminmah Apr 10, 2025
c79890e
Addressed PR comments
benjaminmah Apr 10, 2025
b72d217
Added explicit deduplication
benjaminmah Apr 10, 2025
3e9c7f7
Hard coded llm name and chunk size
benjaminmah Apr 14, 2025
0da3e8a
Changed output to be a list and JSON
benjaminmah Apr 15, 2025
1d6ecc6
Addressed PR comments
benjaminmah Apr 16, 2025
9b07b5b
Simplified LLM creation
benjaminmah Apr 16, 2025
e852e9d
Replaced DB with Bugzilla calls
benjaminmah Apr 17, 2025
11a6444
Addressed PR comments
benjaminmah Apr 23, 2025
aebee0a
Addressed PR comments
benjaminmah Apr 27, 2025
38499c3
Changed input to have channel and release separately
benjaminmah Apr 29, 2025
2187aab
Removed test and function
benjaminmah Apr 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 236 additions & 0 deletions bugbug/tools/release_notes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
import logging
import os
import re

import requests
import tiktoken
from bs4 import BeautifulSoup
from openai import OpenAI

MODEL = "gpt-4o"

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
Comment thread
suhaibmujahid marked this conversation as resolved.
Outdated
Comment thread
suhaibmujahid marked this conversation as resolved.
Outdated

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class ReleaseNotesGenerator:
def __init__(self, chunk_size=10000):
self.chunk_size = chunk_size

def get_previous_version(self, current_version):
match = re.match(r"(FIREFOX_BETA_)(\d+)(_BASE)", current_version)
if not match:
raise ValueError("Invalid version format")
prefix, version_number, suffix = match.groups()
previous_version_number = int(version_number) - 1
return f"{prefix}{previous_version_number}{suffix}"

def get_token_count(self, text):
encoding = tiktoken.encoding_for_model(MODEL)
return len(encoding.encode(text))

def split_into_chunks(self, commit_log):
Comment thread
benjaminmah marked this conversation as resolved.
Outdated
commit_blocks = commit_log.split("\n\n")
chunks = []
current_chunk = []
current_token_count = 0

for block in commit_blocks:
block_token_count = self.get_token_count(block)

if current_token_count + block_token_count > self.chunk_size:
chunks.append("\n\n".join(current_chunk))
current_chunk = []
current_token_count = 0

current_chunk.append(block)
current_token_count += block_token_count

if current_chunk:
chunks.append("\n\n".join(current_chunk))

return chunks

def summarize_with_gpt(self, input_text):
prompt = f"""
You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps:

1. Must Include Only Meaningful Changes:
- Only keep commits that significantly impact users and are strictly user-facing, such as:
- New features
- UI changes
- Major performance improvements
- Security patches (if user-facing)
- Web platform changes that affect how websites behave
- DO NOT include:
- Small bug fixes unless critical
- Internal code refactoring
- Test changes or documentation updates
- Developer tooling or CI/CD pipeline changes
Again, only include changes that are STRICTLY USER-FACING.

2. Output Format:
- Use simple, non-technical language suitable for release notes.
- Use the following strict format for each relevant commit, in CSV FORMAT:
[Type of Change],Description of the change,Bug XXXX,Reason why the change is impactful for end users
- Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc.

3. Bad Example (DO NOT FOLLOW):
[Feature],Enable async FlushRendering during resizing window if Windows DirectComposition is used,Bug 1922721,Improves performance and responsiveness when resizing windows on systems using Windows DirectComposition.
We should exclude this change because it contains technical jargon that is unclear to general users, making it difficult to understand. Additionally, the impact is limited to a specific subset of Windows users with DirectComposition enabled, and the improvement is not significant enough to be noteworthy in the release notes.

4. Be Aggressive in Filtering:
- If you're unsure whether a commit impacts end users, EXCLUDE it.
- Do not list developer-focused changes.

5. Select Only the Top 10 Commits:
- If there are more than 10 relevant commits, choose the most impactful ones.

6. Input:
Here is the chunk of commit logs you need to focus on:
Comment thread
suhaibmujahid marked this conversation as resolved.
Outdated
{input_text}
Comment thread
suhaibmujahid marked this conversation as resolved.
Outdated

7. Output Requirements:
- Output must be raw CSV text—no formatting, no extra text.
- Do not wrap the output in triple backticks (` ``` `) or use markdown formatting.
- Do not include the words "CSV" or any headers—just the data.
"""
try:
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model=MODEL,
temperature=0.1,
)
return response.choices[0].message.content.strip()
except Exception as e:
logger.error(f"Error calling OpenAI API: {e}")
return "Error: Unable to generate summary."

def generate_summaries(self, commit_log):
chunks = self.split_into_chunks(commit_log)
return [self.summarize_with_gpt(chunk) for chunk in chunks]

def clean_commits(self, commit_log, keywords):
cleaned_commits = []
commit_blocks = commit_log.split("\n")

for block in commit_blocks:
if (
not any(
re.search(rf"\b{keyword}\b", block, re.IGNORECASE)
for keyword in keywords
)
and re.search(r"Bug \d+", block, re.IGNORECASE)
Comment thread
benjaminmah marked this conversation as resolved.
Outdated
and not re.search(
r"release\+treescript@mozilla\.org", block, re.IGNORECASE
)
and not re.search(r"nightly", block, re.IGNORECASE)
):
bug_id_match = re.search(r"Bug (\d+)", block, re.IGNORECASE)
if not bug_id_match:
continue
Comment thread
benjaminmah marked this conversation as resolved.
Outdated

bug_position = re.search(r"Bug \d+.*", block, re.IGNORECASE)
if bug_position:
block = bug_position.group(0)

commit_summary = block
cleaned_commits.append(commit_summary)
Comment thread
benjaminmah marked this conversation as resolved.
Outdated

return "\n\n".join(cleaned_commits)

def remove_unworthy_commits(self, input_text):
prompt = f"""Review the following list of release notes and remove anything that is not worthy of official release notes. Keep only changes that are meaningful, impactful, and directly relevant to end users, such as:
- New features that users will notice and interact with.
- Significant fixes that resolve major user-facing issues.
- Performance improvements that make a clear difference in speed or responsiveness.
- Accessibility enhancements that improve usability for a broad set of users.
- Critical security updates that protect users from vulnerabilities.

Strict Filtering Criteria - REMOVE the following:
- Overly technical web platform changes (e.g., spec compliance tweaks, behind-the-scenes API adjustments).
- Developer-facing features that have no direct user impact.
- Minor UI refinements (e.g., button width adjustments, small animation tweaks).
- Bug fixes that don’t impact most users.
- Obscure web compatibility changes that apply only to edge-case websites.
- Duplicate entries or similar changes that were already listed.

Here is the list to filter:
{input_text}

Instructions:
- KEEP THE SAME FORMAT (do not change the structure of entries that remain).
- REMOVE UNWORTHY ENTRIES ENTIRELY (do not rewrite them—just delete).
- DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST.
- The output must be only the cleaned-up list, formatted exactly the same way.
"""
try:
response = client.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt,
}
],
model=MODEL,
temperature=0.1,
)
return response.choices[0].message.content.strip()
except Exception as e:
Comment thread
suhaibmujahid marked this conversation as resolved.
Outdated
logger.error(f"Error while calling OpenAI API: {e}")
return "Error: Unable to remove unworthy commits."

def generate_worthy_commits(self, version):
Comment thread
suhaibmujahid marked this conversation as resolved.
Outdated
self.version2 = version
self.version1 = self.get_previous_version(version)
self.output_file = f"version_summary_{self.version2}.txt"
Comment thread
benjaminmah marked this conversation as resolved.
Outdated

logger.info(f"Generating list of commits for version: {self.version2}")
url = f"https://hg.mozilla.org/releases/mozilla-release/pushloghtml?fromchange={self.version1}&tochange={self.version2}"
response = requests.get(url)
changes_output = ""

if response.status_code == 200:
soup = BeautifulSoup(response.text, "html.parser")
Comment thread
benjaminmah marked this conversation as resolved.
Outdated
commit_entries = soup.find_all("tr", class_="pushlogentry")
commits = [
(
entry.find_all("td")[1].text.strip(),
entry.find_all("td")[2].get_text(separator=" ", strip=True),
)
for entry in commit_entries
]
changes_output = "\n".join(commit[1] for commit in commits)
else:
logger.error(
f"Failed to retrieve the webpage. Status code: {response.status_code}"
)
return

if not changes_output:
logger.error("No changes found.")
Comment thread
suhaibmujahid marked this conversation as resolved.
Outdated
return

logger.info("Cleaning commit log...")
keywords_to_remove = [
"Backed out",
"a=testonly",
"DONTBUILD",
"add tests",
"disable test",
]
Comment thread
benjaminmah marked this conversation as resolved.
Outdated
cleaned_commits = self.clean_commits(changes_output, keywords_to_remove)

logger.info("Generating summaries for cleaned commits...")
summaries = self.generate_summaries(cleaned_commits)
combined_list = "\n".join(summaries)

logger.info("Removing unworthy commits from the list...")
combined_list = self.remove_unworthy_commits(combined_list)
Comment thread
suhaibmujahid marked this conversation as resolved.
Outdated

with open(self.output_file, "w") as file:
file.write(combined_list)
Comment thread
suhaibmujahid marked this conversation as resolved.
Outdated

logger.info(f"Worthy commits saved to {self.output_file}")
26 changes: 26 additions & 0 deletions scripts/release_notes_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import argparse
import logging

from bugbug.tools.release_notes import ReleaseNotesGenerator

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def main():
parser = argparse.ArgumentParser(description="Generate Firefox release notes.")
parser.add_argument("--version", required=True, help="Target version identifier")
parser.add_argument(
"--chunk-size", type=int, default=10000, help="Chunk size for token processing"
)

args = parser.parse_args()

generator = ReleaseNotesGenerator(
chunk_size=args.chunk_size,
)
generator.generate_worthy_commits(version=args.version)


if __name__ == "__main__":
main()