33import os
44import json
55import tiktoken
6+ import argparse
67
78# Tokenizer function using OpenAI's tiktoken for LLMs (GPT-3/4)
89def count_tokens (text , model ):
@@ -42,13 +43,37 @@ def load_config(config_file_name="config.json"):
4243 print (f"Config file '{ config_file_name } ' not found in working directory or script directory." )
4344 sys .exit (1 )
4445
46+ # Function to get the latest commit hash for the current branch
47+ def get_latest_commit ():
48+ try :
49+ result = subprocess .run (
50+ ["git" , "rev-parse" , "HEAD" ],
51+ capture_output = True , text = True , check = True , encoding = 'utf-8' , errors = 'replace'
52+ )
53+ return result .stdout .strip ()
54+ except subprocess .CalledProcessError as e :
55+ print (f"Error getting the latest commit: { e } " )
56+ sys .exit (1 )
57+
58+ # Function to get the latest commit hash from a specified branch
59+ def get_latest_commit_from_branch (branch ):
60+ try :
61+ result = subprocess .run (
62+ ["git" , "rev-parse" , branch ],
63+ capture_output = True , text = True , check = True , encoding = 'utf-8' , errors = 'replace'
64+ )
65+ return result .stdout .strip ()
66+ except subprocess .CalledProcessError as e :
67+ print (f"Error getting the latest commit from branch '{ branch } ': { e } " )
68+ sys .exit (1 )
69+
4570# Main function to generate the combined diff and calculate token count
4671def main (commit1 , commit2 , output_file ):
4772 # Load the config from the default or specified path
4873 config = load_config ()
4974
5075 # Extract tiktoken model and diff configs from the config
51- tiktoken_model = config .get ("tiktoken_model" , "gpt-4 " )
76+ tiktoken_model = config .get ("tiktoken_model" , "gpt-4o " )
5277 diff_configs = config ["diffs" ]
5378
5479 combined_diff = ""
@@ -72,13 +97,30 @@ def main(commit1, commit2, output_file):
7297
7398# Entry point of the script
7499if __name__ == "__main__" :
75- if len (sys .argv ) != 4 :
76- print ("Usage: python gitdiff4llm.py <commit1> <commit2> <output_file>" )
77- sys .exit (1 )
100+ # Set up argument parser
101+ parser = argparse .ArgumentParser (description = "Run git diff between two commits and analyze with LLM." )
102+ parser .add_argument ("-o" , "--output_file" , required = True , help = "The file to output the combined diff." )
103+ parser .add_argument ("-c1" , "--commit1" , help = "The first commit hash." )
104+ parser .add_argument ("-c2" , "--commit2" , help = "The second commit hash." )
105+ parser .add_argument ("-b" , "--branch" , help = "Compare the latest commit on the current branch to the latest commit on another branch (e.g., master)." )
106+
107+ args = parser .parse_args ()
108+
109+ # Determine the commit hashes
110+ if args .branch :
111+ commit1 = get_latest_commit_from_branch (args .branch )
112+ commit2 = get_latest_commit ()
113+
114+ # Print the commits being used for the comparison
115+ print (f"Comparing latest commit from branch '{ args .branch } ' ({ commit1 [:12 ]} ) with the latest commit on the current branch ({ commit2 [:12 ]} )." )
116+ else :
117+ if not args .commit1 or not args .commit2 :
118+ print ("You must either provide two commit hashes using --commit1 and --commit2, or use the -b option to compare against another branch." )
119+ sys .exit (1 )
120+ commit1 = args .commit1
121+ commit2 = args .commit2
78122
79- commit1 = sys .argv [1 ]
80- commit2 = sys .argv [2 ]
81- output_file = sys .argv [3 ]
123+ output_file = args .output_file
82124
83125 # Make sure the output directory exists
84126 os .makedirs (os .path .dirname (output_file ), exist_ok = True )
0 commit comments