Skip to content

Commit e769851

Browse files
feat: enhance PR description generation for large diffs (#117)
- Introduced chunking strategy to handle large diffs by splitting them into manageable parts. - Added functionality to estimate token counts and create summary prompts for extremely large diffs. - Improved error handling for API responses and ensured robust processing of diff chunks. - Updated the main logic to generate concise pull request descriptions based on the size of the diff.
1 parent c7dcd6d commit e769851

1 file changed

Lines changed: 218 additions & 57 deletions

File tree

.github/actions/auto-pr-description/generate_pr_description.js

Lines changed: 218 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,189 @@
33
const fs = require('fs');
44
const path = require('path');
55

6+
// Configuration constants
7+
const MAX_TOKENS_PER_REQUEST = 100000; // Conservative limit for Gemini 2.5 Flash
8+
const CHARS_PER_TOKEN = 4; // Rough estimation
9+
const MAX_CHARS_PER_CHUNK = MAX_TOKENS_PER_REQUEST * CHARS_PER_TOKEN;
10+
const MAX_CHUNKS = 10; // Limit to prevent excessive API calls
11+
12+
/**
13+
* Estimate token count for text (rough approximation)
14+
*/
15+
function estimateTokens(text) {
16+
return Math.ceil(text.length / CHARS_PER_TOKEN);
17+
}
18+
19+
/**
20+
* Split diff into chunks by file boundaries
21+
*/
22+
function chunkDiffByFiles(diffContent) {
23+
const fileChunks = [];
24+
const lines = diffContent.split('\n');
25+
let currentChunk = '';
26+
let currentFile = '';
27+
28+
for (const line of lines) {
29+
// Check if this is a new file header
30+
if (line.startsWith('diff --git') || line.startsWith('+++') || line.startsWith('---')) {
31+
// If we have content and it's getting large, save current chunk
32+
if (currentChunk && estimateTokens(currentChunk) > MAX_CHARS_PER_CHUNK / 2) {
33+
fileChunks.push({
34+
content: currentChunk.trim(),
35+
file: currentFile,
36+
type: 'file-chunk'
37+
});
38+
currentChunk = '';
39+
}
40+
41+
// Start new chunk
42+
currentChunk = line + '\n';
43+
44+
// Extract filename for reference
45+
if (line.startsWith('+++')) {
46+
currentFile = line.replace('+++ b/', '').replace('+++ a/', '');
47+
}
48+
} else {
49+
currentChunk += line + '\n';
50+
}
51+
}
52+
53+
// Add the last chunk
54+
if (currentChunk.trim()) {
55+
fileChunks.push({
56+
content: currentChunk.trim(),
57+
file: currentFile,
58+
type: 'file-chunk'
59+
});
60+
}
61+
62+
return fileChunks;
63+
}
64+
65+
/**
66+
* Create a summary prompt for extremely large diffs
67+
*/
68+
function createSummaryPrompt(diffContent) {
69+
return `Analyze this git diff and provide a high-level summary. Focus on:
70+
1. What types of files were changed (e.g., source code, tests, config, docs)
71+
2. The overall scope of changes (e.g., new feature, bug fix, refactor)
72+
3. Any major architectural changes or new dependencies
73+
74+
Keep the summary to 2-3 sentences maximum.
75+
76+
Git diff:
77+
${diffContent}`;
78+
}
79+
80+
/**
81+
* Create the main PR description prompt
82+
*/
83+
function createPRPrompt(diffContent) {
84+
return `Write a concise pull request description based on the git diff. Use this exact format:
85+
86+
## Description
87+
Brief summary of changes (1-2 sentences max).
88+
89+
## Changes
90+
- [ ] Key change 1
91+
- [ ] Key change 2
92+
- [ ] Key change 3 (max 5 items)
93+
94+
## Verification
95+
- [ ] Test step 1
96+
- [ ] Test step 2
97+
- [ ] Test step 3 (max 3 items)
98+
99+
Keep it concise and focused on the most important changes.
100+
101+
Here is the git diff:
102+
103+
${diffContent}`;
104+
}
105+
106+
/**
107+
* Call Gemini API with the given prompt
108+
*/
109+
async function callGeminiAPI(prompt, apiKey) {
110+
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${apiKey}`, {
111+
method: 'POST',
112+
headers: { 'Content-Type': 'application/json' },
113+
body: JSON.stringify({
114+
contents: [{
115+
parts: [{
116+
text: prompt
117+
}]
118+
}],
119+
generationConfig: {
120+
temperature: 0.7,
121+
topK: 40,
122+
topP: 0.95,
123+
maxOutputTokens: 2048,
124+
}
125+
})
126+
});
127+
128+
if (!response.ok) {
129+
const errorText = await response.text();
130+
throw new Error(`Gemini API request failed with status ${response.status}: ${errorText}`);
131+
}
132+
133+
const json = await response.json();
134+
135+
if (!json.candidates || !json.candidates[0]) {
136+
throw new Error('Invalid response from Gemini API');
137+
}
138+
139+
if (!json.candidates[0].content || !json.candidates[0].content.parts || !json.candidates[0].content.parts[0] || !json.candidates[0].content.parts[0].text) {
140+
throw new Error('Invalid response structure from Gemini API - missing content');
141+
}
142+
143+
return json.candidates[0].content.parts[0].text;
144+
}
145+
146+
/**
147+
* Process diff chunks and combine results
148+
*/
149+
async function processChunks(chunks, apiKey) {
150+
if (chunks.length === 1) {
151+
// Single chunk, process normally
152+
return await callGeminiAPI(createPRPrompt(chunks[0].content), apiKey);
153+
}
154+
155+
// Multiple chunks - process each and combine
156+
const chunkResults = [];
157+
158+
for (let i = 0; i < Math.min(chunks.length, MAX_CHUNKS); i++) {
159+
const chunk = chunks[i];
160+
console.error(`Processing chunk ${i + 1}/${Math.min(chunks.length, MAX_CHUNKS)} (${chunk.file || 'unknown file'})`);
161+
162+
try {
163+
const result = await callGeminiAPI(createPRPrompt(chunk.content), apiKey);
164+
chunkResults.push({
165+
file: chunk.file,
166+
result: result
167+
});
168+
} catch (error) {
169+
console.error(`Warning: Failed to process chunk ${i + 1}: ${error.message}`);
170+
// Continue with other chunks
171+
}
172+
}
173+
174+
if (chunkResults.length === 0) {
175+
throw new Error('Failed to process any chunks');
176+
}
177+
178+
// Combine results from multiple chunks
179+
const combinedPrompt = `Combine these pull request descriptions into a single, coherent PR description. Use the same format:
180+
181+
${chunkResults.map((chunk, index) => `## Chunk ${index + 1} (${chunk.file}):
182+
${chunk.result}`).join('\n\n')}
183+
184+
Create a unified description that captures the overall changes across all files.`;
185+
186+
return await callGeminiAPI(combinedPrompt, apiKey);
187+
}
188+
6189
(async () => {
7190
const [, , diffFile] = process.argv;
8191
if (!diffFile) {
@@ -21,8 +204,25 @@ const path = require('path');
21204
process.exit(1);
22205
}
23206

24-
// Create prompt for PR description generation
25-
const promptTemplate = `Write a concise pull request description based on the git diff. Use this exact format:
207+
const diffContent = fs.readFileSync(diffFile, 'utf8');
208+
const estimatedTokens = estimateTokens(diffContent);
209+
210+
console.error(`Diff size: ${diffContent.length} characters (~${estimatedTokens} tokens)`);
211+
212+
try {
213+
let result;
214+
215+
if (estimatedTokens > MAX_TOKENS_PER_REQUEST) {
216+
console.error('Large diff detected, using chunking strategy...');
217+
218+
// For extremely large diffs, first try to summarize
219+
if (estimatedTokens > MAX_TOKENS_PER_REQUEST * 5) {
220+
console.error('Extremely large diff detected, using summary approach...');
221+
const summaryPrompt = createSummaryPrompt(diffContent);
222+
result = await callGeminiAPI(summaryPrompt, apiKey);
223+
224+
// Create a simplified PR description based on the summary
225+
const prPrompt = `Based on this summary of changes, create a pull request description using this format:
26226
27227
## Description
28228
Brief summary of changes (1-2 sentences max).
@@ -37,64 +237,25 @@ Brief summary of changes (1-2 sentences max).
37237
- [ ] Test step 2
38238
- [ ] Test step 3 (max 3 items)
39239
40-
Keep it concise and focused on the most important changes.`;
41-
42-
const diffContent = fs.readFileSync(diffFile, 'utf8');
43-
const combinedPrompt = `${promptTemplate}\n\nHere is the git diff:\n\n${diffContent}`;
44-
45-
try {
46-
const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${apiKey}`, {
47-
method: 'POST',
48-
headers: { 'Content-Type': 'application/json' },
49-
body: JSON.stringify({
50-
contents: [{
51-
parts: [{
52-
text: combinedPrompt
53-
}]
54-
}],
55-
generationConfig: {
56-
temperature: 0.7,
57-
topK: 40,
58-
topP: 0.95,
59-
maxOutputTokens: 1024,
240+
Summary: ${result}`;
241+
242+
result = await callGeminiAPI(prPrompt, apiKey);
243+
} else {
244+
// Chunk the diff and process
245+
const chunks = chunkDiffByFiles(diffContent);
246+
console.error(`Split diff into ${chunks.length} chunks`);
247+
248+
if (chunks.length > MAX_CHUNKS) {
249+
console.error(`Warning: Too many chunks (${chunks.length}), processing first ${MAX_CHUNKS} chunks only`);
60250
}
61-
})
62-
});
63-
64-
if (!response.ok) {
65-
const errorText = await response.text();
66-
console.error(`Error: Gemini API request failed with status ${response.status}`);
67-
console.error(`Response: ${errorText}`);
68-
process.exit(1);
69-
}
70-
71-
const json = await response.json();
72-
73-
if (!json.candidates || !json.candidates[0]) {
74-
console.error('Error: Invalid response from Gemini API');
75-
console.error(JSON.stringify(json, null, 2));
76-
process.exit(1);
77-
}
78-
79-
// Check if response was truncated due to max tokens
80-
if (json.candidates[0].finishReason === 'MAX_TOKENS') {
81-
console.error('Warning: Response was truncated due to token limit. Consider reducing diff size or using more specific ignore-files.');
82-
// Continue processing the partial response
83-
}
84-
85-
if (!json.candidates[0].content) {
86-
console.error('Error: No content in API response');
87-
console.error(JSON.stringify(json, null, 2));
88-
process.exit(1);
89-
}
90-
91-
if (!json.candidates[0].content.parts || !json.candidates[0].content.parts[0] || !json.candidates[0].content.parts[0].text) {
92-
console.error('Error: Invalid response structure from Gemini API - missing parts or text');
93-
console.error(JSON.stringify(json, null, 2));
94-
process.exit(1);
251+
252+
result = await processChunks(chunks, apiKey);
253+
}
254+
} else {
255+
// Small diff, process normally
256+
result = await callGeminiAPI(createPRPrompt(diffContent), apiKey);
95257
}
96258

97-
const result = json.candidates[0].content.parts[0].text;
98259
process.stdout.write(result);
99260
} catch (error) {
100261
console.error(`Error: Failed to generate pull request description: ${error.message}`);

0 commit comments

Comments
 (0)