11import subprocess
22import sys
33import os
4+ import json
45import tiktoken
56
67# Tokenizer function using OpenAI's tiktoken for LLMs (GPT-3/4)
7- def count_tokens (text , model = "gpt-4o" ):
8+ def count_tokens (text , model ):
89 encoding = tiktoken .encoding_for_model (model )
910 return len (encoding .encode (text ))
1011
@@ -20,29 +21,50 @@ def run_git_diff(commit1, commit2, diff_options):
2021 print (f"Error running git diff: { e } " )
2122 sys .exit (1 )
2223
24+ # Function to load config (diff options and tiktoken model) from JSON config file
25+ def load_config (config_file_name = "config.json" ):
26+ # First try to find the config file in the current working directory
27+ config_path = os .path .join (os .getcwd (), config_file_name )
28+
29+ # If not found in the working directory, try to find it in the directory of the script or executable
30+ if not os .path .exists (config_path ):
31+ script_dir = os .path .dirname (os .path .realpath (__file__ ))
32+ config_path = os .path .join (script_dir , config_file_name )
33+
34+ if os .path .exists (config_path ):
35+ try :
36+ with open (config_path , 'r' , encoding = 'utf-8' ) as f :
37+ return json .load (f )
38+ except (FileNotFoundError , json .JSONDecodeError ) as e :
39+ print (f"Error loading config file: { e } " )
40+ sys .exit (1 )
41+ else :
42+ print (f"Config file '{ config_file_name } ' not found in working directory or script directory." )
43+ sys .exit (1 )
44+
2345# Main function to generate the combined diff and calculate token count
2446def main (commit1 , commit2 , output_file ):
25- # Run git diff with the first set of options
26- diff1 = run_git_diff ( commit1 , commit2 , [ "-U100" , "--ignore-all-space" , "--" , ":!*Test*" ] )
47+ # Load the config from the default or specified path
48+ config = load_config ( )
2749
28- # Run git diff with the second set of options for test files
29- diff2 = run_git_diff (commit1 , commit2 , ["-U20" , "--ignore-all-space" , "--" , "*Test*" ])
50+ # Extract tiktoken model and diff configs from the config
51+ tiktoken_model = config .get ("tiktoken_model" , "gpt-4" )
52+ diff_configs = config ["diffs" ]
3053
31- # Ensure both diffs are valid strings
32- if diff1 is None :
33- diff1 = ""
34- if diff2 is None :
35- diff2 = ""
36-
37- # Combine the two diffs
38- combined_diff = diff1 + "\n " + diff2
54+ combined_diff = ""
55+
56+ # Run git diff for each set of options and combine the results
57+ for diff_options in diff_configs :
58+ diff_output = run_git_diff (commit1 , commit2 , diff_options )
59+ if diff_output :
60+ combined_diff += diff_output + "\n "
3961
4062 # Write the combined diff to the output file
4163 with open (output_file , 'w' , encoding = 'utf-8' ) as f :
4264 f .write (combined_diff )
4365
44- # Calculate token count using LLM tokenizer
45- token_count = count_tokens (combined_diff )
66+ # Calculate token count using the tiktoken model
67+ token_count = count_tokens (combined_diff , tiktoken_model )
4668
4769 # Output results
4870 print (f"Combined diff written to { output_file } " )
@@ -51,7 +73,7 @@ def main(commit1, commit2, output_file):
5173# Entry point of the script
5274if __name__ == "__main__" :
5375 if len (sys .argv ) != 4 :
54- print ("Usage: python gitdiff4review .py <commit1> <commit2> <output_file>" )
76+ print ("Usage: python gitdiff4llm .py <commit1> <commit2> <output_file>" )
5577 sys .exit (1 )
5678
5779 commit1 = sys .argv [1 ]
0 commit comments