Skip to content
This repository was archived by the owner on Mar 27, 2026. It is now read-only.

Commit 1629844

Browse files
committed
use config for diff options
1 parent 73f0af6 commit 1629844

2 files changed

Lines changed: 46 additions & 16 deletions

File tree

config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"tiktoken_model": "gpt-4o",
3+
"diffs": [
4+
["-U50", "--ignore-all-space", "--", ":!*Test*"],
5+
["-U20", "--ignore-all-space", "--", "*Test*"]
6+
]
7+
}
8+

gitdiff4llm.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import subprocess
22
import sys
33
import os
4+
import json
45
import tiktoken
56

67
# Tokenizer function using OpenAI's tiktoken for LLMs (GPT-3/4)
7-
def count_tokens(text, model="gpt-4o"):
8+
def count_tokens(text, model):
89
encoding = tiktoken.encoding_for_model(model)
910
return len(encoding.encode(text))
1011

@@ -20,29 +21,50 @@ def run_git_diff(commit1, commit2, diff_options):
2021
print(f"Error running git diff: {e}")
2122
sys.exit(1)
2223

24+
# Function to load config (diff options and tiktoken model) from JSON config file
25+
def load_config(config_file_name="config.json"):
26+
# First try to find the config file in the current working directory
27+
config_path = os.path.join(os.getcwd(), config_file_name)
28+
29+
# If not found in the working directory, try to find it in the directory of the script or executable
30+
if not os.path.exists(config_path):
31+
script_dir = os.path.dirname(os.path.realpath(__file__))
32+
config_path = os.path.join(script_dir, config_file_name)
33+
34+
if os.path.exists(config_path):
35+
try:
36+
with open(config_path, 'r', encoding='utf-8') as f:
37+
return json.load(f)
38+
except (FileNotFoundError, json.JSONDecodeError) as e:
39+
print(f"Error loading config file: {e}")
40+
sys.exit(1)
41+
else:
42+
print(f"Config file '{config_file_name}' not found in working directory or script directory.")
43+
sys.exit(1)
44+
2345
# Main function to generate the combined diff and calculate token count
2446
def main(commit1, commit2, output_file):
25-
# Run git diff with the first set of options
26-
diff1 = run_git_diff(commit1, commit2, ["-U100", "--ignore-all-space", "--", ":!*Test*"])
47+
# Load the config from the default or specified path
48+
config = load_config()
2749

28-
# Run git diff with the second set of options for test files
29-
diff2 = run_git_diff(commit1, commit2, ["-U20", "--ignore-all-space", "--", "*Test*"])
50+
# Extract tiktoken model and diff configs from the config
51+
tiktoken_model = config.get("tiktoken_model", "gpt-4")
52+
diff_configs = config["diffs"]
3053

31-
# Ensure both diffs are valid strings
32-
if diff1 is None:
33-
diff1 = ""
34-
if diff2 is None:
35-
diff2 = ""
36-
37-
# Combine the two diffs
38-
combined_diff = diff1 + "\n" + diff2
54+
combined_diff = ""
55+
56+
# Run git diff for each set of options and combine the results
57+
for diff_options in diff_configs:
58+
diff_output = run_git_diff(commit1, commit2, diff_options)
59+
if diff_output:
60+
combined_diff += diff_output + "\n"
3961

4062
# Write the combined diff to the output file
4163
with open(output_file, 'w', encoding='utf-8') as f:
4264
f.write(combined_diff)
4365

44-
# Calculate token count using LLM tokenizer
45-
token_count = count_tokens(combined_diff)
66+
# Calculate token count using the tiktoken model
67+
token_count = count_tokens(combined_diff, tiktoken_model)
4668

4769
# Output results
4870
print(f"Combined diff written to {output_file}")
@@ -51,7 +73,7 @@ def main(commit1, commit2, output_file):
5173
# Entry point of the script
5274
if __name__ == "__main__":
5375
if len(sys.argv) != 4:
54-
print("Usage: python gitdiff4review.py <commit1> <commit2> <output_file>")
76+
print("Usage: python gitdiff4llm.py <commit1> <commit2> <output_file>")
5577
sys.exit(1)
5678

5779
commit1 = sys.argv[1]

0 commit comments

Comments
 (0)