22"""
33# std imports
44import argparse
5+ import json
56from math import comb
67from typing import Union
78
1314def get_args ():
1415 parser = argparse .ArgumentParser (description = __doc__ )
1516 parser .add_argument ("input_csv" , type = str , help = "Input CSV file containing the test cases." )
16- parser .add_argument ("-m" , "--metric" , default = "pass" , choices = ["build" , "pass" , "speedup" ], help = "Metric to compute." )
17- parser .add_argument ("-k" , "--k" , type = int , default = 1 , help = "K value for pass@k, build@k, and speedup@k." )
17+ parser .add_argument ("-k" , "--k" , type = int , nargs = '+' , default = [1 ,5 ,10 ,20 ], help = "K value for pass@k, build@k, and speedup@k." )
1818 parser .add_argument ("-n" , "--n" , type = int , default = 1 , help = "N value for speedup@k." )
1919 parser .add_argument ("-o" , "--output" , type = str , help = "Output csv file containing the results." )
20+ parser .add_argument ("--problem-sizes" , type = str , default = '../drivers/problem-sizes.json' , help = "Json with problem sizes. Used for calculating GPU efficiency." )
21+ parser .add_argument ("--model-name" , type = str , help = "Add model name column with this value" )
2022 return parser .parse_args ()
2123
2224def get_correctness_df (df : pd .DataFrame ) -> pd .DataFrame :
2325 """ Group by name, parallelism_model, and output_idx, and set is_valid to true only if all rows in the group have is_valid = true.
2426 Set it to false otherwise.
2527 """
2628 # group all the runs for this LLM output
29+ df = df .copy ()
2730 agg = df .groupby (["name" , "parallelism_model" , "output_idx" ]).agg ({"is_valid" : ["count" , "sum" ]})
2831 agg .columns = ["count" , "sum" ]
2932
3033 # mark as valid only if all runs are valid
3134 agg ["is_valid" ] = agg ["count" ] == agg ["sum" ]
3235 agg = agg .reset_index ()
3336 agg = agg .drop (columns = ["count" , "sum" ])
37+
38+ # add problem_type column from df
39+ agg = agg .merge (df [["name" , "problem_type" ]].drop_duplicates (), on = "name" , how = "left" )
3440
3541 return agg
3642
@@ -39,18 +45,26 @@ def nCr(n: int, r: int) -> int:
3945 return 1
4046 return comb (n , r )
4147
48+ def buildk (df : pd .DataFrame , k : int ) -> pd .DataFrame :
49+ """ Compute the build@k metric """
50+ agg = df .groupby (["name" , "parallelism_model" , "problem_type" ]).agg ({"did_build" : ["count" , "sum" ]})
51+ agg .columns = ["total_build_attempts" , "successful_builds" ]
52+ agg = agg .reset_index ()
53+ agg [f"build@{ k } " ] = agg .apply (lambda x : _passk (x ["total_build_attempts" ], x ["successful_builds" ], k ), axis = 1 )
54+ return agg .groupby (["parallelism_model" , "problem_type" ]).agg ({f"build@{ k } " : "mean" })
55+
4256def _passk (num_samples : int , num_correct : int , k : int ) -> float :
4357 if num_samples - num_correct < k :
4458 return 1.0
4559 return 1.0 - np .prod (1.0 - k / np .arange (num_samples - num_correct + 1 , num_samples + 1 ))
4660
4761def passk (df : pd .DataFrame , k : int ) -> pd .DataFrame :
4862 """ Compute the pass@k metric """
49- agg = df .groupby (["name" , "parallelism_model" ]).agg ({"is_valid" : ["count" , "sum" ]})
63+ agg = df .groupby (["name" , "parallelism_model" , "problem_type" ]).agg ({"is_valid" : ["count" , "sum" ]})
5064 agg .columns = ["total_runs" , "valid_count" ]
5165 agg = agg .reset_index ()
52- agg ["pass@k " ] = agg .apply (lambda x : _passk (x ["total_runs" ], x ["valid_count" ], k ), axis = 1 )
53- return agg .groupby (["parallelism_model" ]).agg ({"pass@k " : "mean" })
66+ agg [f "pass@{ k } " ] = agg .apply (lambda x : _passk (x ["total_runs" ], x ["valid_count" ], k ), axis = 1 )
67+ return agg .groupby (["parallelism_model" , "problem_type" ]).agg ({f "pass@{ k } " : "mean" })
5468
5569def _speedupk (runtimes : Union [pd .Series , np .ndarray ], baseline_runtime : float , k : int ) -> float :
5670 """ Compute the speedup@k metric """
@@ -68,12 +82,14 @@ def _speedupk(runtimes: Union[pd.Series, np.ndarray], baseline_runtime: float, k
6882 num_samples = runtimes .shape [0 ]
6983 for j in range (1 , num_samples + 1 ):
7084 num = nCr (j - 1 , k - 1 ) * baseline_runtime
71- den = nCr (num_samples , k ) * runtimes [j - 1 ]
85+ den = nCr (num_samples , k ) * max ( runtimes [j - 1 ], 1e-8 )
7286 sum += num / den
7387 return pd .Series ({f"speedup@{ k } " : sum })
7488
7589def speedupk (df : pd .DataFrame , k : int , n : int ) -> pd .DataFrame :
7690 """ Compute the speedup@k metric """
91+ df = df .copy ()
92+
7793 # get all runs where is_valid is true
7894 df = df [df ["is_valid" ] == True ]
7995
@@ -84,45 +100,155 @@ def speedupk(df: pd.DataFrame, k: int, n: int) -> pd.DataFrame:
84100 ((df ["parallelism_model" ] == "kokkos" ) & (df ["num_threads" ] == 32 )) |
85101 ((df ["parallelism_model" ] == "omp" ) & (df ["num_threads" ] == 64 )) |
86102 ((df ["parallelism_model" ] == "mpi" ) & (df ["num_procs" ] == 512 )) |
87- ((df ["parallelism_model" ] == "mpi+omp" ) & (df ["num_procs" ] == 4 ) & (df ["num_threads" ] == 8 ))]
103+ ((df ["parallelism_model" ] == "mpi+omp" ) & (df ["num_procs" ] == 4 ) & (df ["num_threads" ] == 64 ))]
88104 df = df .copy ()
89105
90106 # use min best_sequential_runtime
91107 df ["best_sequential_runtime" ] = df .groupby (["name" , "parallelism_model" , "output_idx" ])["best_sequential_runtime" ].transform ("min" )
92108
93109 # group by name, parallelism_model, and output_idx and call _speedupk
94- df = df .groupby (["name" , "parallelism_model" , "output_idx " ]).apply (
110+ df = df .groupby (["name" , "parallelism_model" , "problem_type " ]).apply (
95111 lambda row : _speedupk (row ["runtime" ], np .min (row ["best_sequential_runtime" ]), k )
96112 ).reset_index ()
97113
98114 # compute the mean speedup@k
99- df = df .groupby (["parallelism_model" ]).agg ({f"speedup@{ k } " : "mean" })
115+ df = df .groupby (["parallelism_model" , "problem_type" ]).agg ({f"speedup@{ k } " : "mean" })
116+
117+ return df
118+
119+ def _efficiencyk (runtimes : Union [pd .Series , np .ndarray ], baseline_runtime : float , k : int , n_resources : Union [pd .Series , np .ndarray ]) -> float :
120+ """ Compute the efficiency@k metric """
121+ # create a copy of the runtimes
122+ if isinstance (runtimes , pd .Series ):
123+ runtimes = runtimes .values .copy ()
124+ else :
125+ runtimes = runtimes .copy ()
126+
127+ if isinstance (n_resources , pd .Series ):
128+ n_resources = n_resources .values .copy ()
129+ else :
130+ n_resources = n_resources .copy ()
131+
132+ # sort the runtimes
133+ runtimes .sort ()
134+
135+ # compute expected value
136+ sum = 0.0
137+ num_samples = runtimes .shape [0 ]
138+ for j in range (1 , num_samples + 1 ):
139+ num = nCr (j - 1 , k - 1 ) * baseline_runtime
140+ den = nCr (num_samples , k ) * max (runtimes [j - 1 ], 1e-8 ) * n_resources [j - 1 ]
141+ sum += num / den
142+ return pd .Series ({f"efficiency@{ k } " : sum })
143+
144+ def efficiencyk (df : pd .DataFrame , k : int , n : int ) -> pd .DataFrame :
145+ """ Compute the efficiency@k metric """
146+ df = df .copy ()
147+
148+ # get all runs where is_valid is true
149+ df = df [df ["is_valid" ] == True ]
150+
151+ # choose processor count; hardcoded right now
152+ df = df [(df ["parallelism_model" ] == "serial" ) |
153+ (df ["parallelism_model" ] == "cuda" ) |
154+ (df ["parallelism_model" ] == "hip" ) |
155+ ((df ["parallelism_model" ] == "kokkos" ) & (df ["num_threads" ] == 32 )) |
156+ ((df ["parallelism_model" ] == "omp" ) & (df ["num_threads" ] == 64 )) |
157+ ((df ["parallelism_model" ] == "mpi" ) & (df ["num_procs" ] == 512 )) |
158+ ((df ["parallelism_model" ] == "mpi+omp" ) & (df ["num_procs" ] == 4 ) & (df ["num_threads" ] == 64 ))]
159+
160+ # set n_resources column to 1 for serial; 32 for kokkos; 64 for omp; 512 for mpi; 4*64 for mpi+omp;
161+ # set it to problem_size for cuda and hip
162+ df ["n_resources" ] = 1
163+ df .loc [df ["parallelism_model" ] == "cuda" , "n_resources" ] = df ["problem_size" ]
164+ df .loc [df ["parallelism_model" ] == "hip" , "n_resources" ] = df ["problem_size" ]
165+ df .loc [df ["parallelism_model" ] == "kokkos" , "n_resources" ] = 32
166+ df .loc [df ["parallelism_model" ] == "omp" , "n_resources" ] = 64
167+ df .loc [df ["parallelism_model" ] == "mpi" , "n_resources" ] = 512
168+ df .loc [df ["parallelism_model" ] == "mpi+omp" , "n_resources" ] = 4 * 64
169+
170+ df = df .copy ()
171+
172+ # use min best_sequential_runtime
173+ df ["best_sequential_runtime" ] = df .groupby (["name" , "parallelism_model" , "output_idx" ])["best_sequential_runtime" ].transform ("min" )
174+
175+ # group by name, parallelism_model, and output_idx and call _efficiencyk
176+ df = df .groupby (["name" , "parallelism_model" , "problem_type" ]).apply (
177+ lambda row : _efficiencyk (row ["runtime" ], np .min (row ["best_sequential_runtime" ]), k , row ["n_resources" ])
178+ ).reset_index ()
179+
180+ # compute the mean efficiency@k
181+ df = df .groupby (["parallelism_model" , "problem_type" ]).agg ({f"efficiency@{ k } " : "mean" })
100182
101183 return df
102184
185+ def parse_problem_size (problem_size : str ) -> int :
186+ """ problem size is of format '(1<<n)' """
187+ num = problem_size .split ("<<" )[1 ][:- 1 ]
188+ return 2 ** int (num )
189+
103190def main ():
104191 args = get_args ()
105192
106193 # read in input
107194 df = pd .read_csv (args .input_csv )
108195
196+ # read in problem sizes
197+ with open (args .problem_sizes , "r" ) as f :
198+ problem_sizes = json .load (f )
199+ for problem in problem_sizes :
200+ for parallelism_model , problem_size in problem_sizes [problem ].items ():
201+ df .loc [(df ["name" ] == problem ) & (df ["parallelism_model" ] == parallelism_model ), "problem_size" ] = parse_problem_size (problem_size )
202+
109203 # remove rows where parallelism_model is kokkos and num_threads is 64
110204 df = df [~ ((df ["parallelism_model" ] == "kokkos" ) & (df ["num_threads" ] == 64 ))]
111205
112206 # filter/aggregate
113207 df ["did_run" ] = df ["did_run" ].fillna (False ) # if it didn't build, then this will be nan; overwrite
114208 df ["is_valid" ] = df ["is_valid" ].fillna (False ) # if it didn't build, then this will be nan; overwrite
115209
116- # compute metric
117- if args .metric == "build" :
118- pass
119- elif args .metric == "pass" :
120- df = get_correctness_df (df )
121- result = passk (df , args .k )
122- print (result )
123- elif args .metric == "speedup" :
124- result = speedupk (df , args .k , args .n )
125- print (result )
210+ # get only valid runs
211+ valid_runs = get_correctness_df (df )
212+
213+ # get values for each k
214+ all_results = []
215+ for k in args .k :
216+ build_values = buildk (df , k )
217+ pass_values = passk (valid_runs , k )
218+ speedup_values = speedupk (df , k , args .n )
219+ efficiency_values = efficiencyk (df , k , args .n )
220+ all_results .extend ([build_values , pass_values , speedup_values , efficiency_values ])
221+
222+ # merge all_results; each df has one column and the same index
223+ # build a new df with all the columns and the same index
224+ merged_df = pd .concat (all_results , axis = 1 ).reset_index ()
225+
226+ # if there were no successfull builds or runs, then speedup@k will be nan after merging
227+ # replace NaN speedup@k values with 0.0
228+ for k in args .k :
229+ merged_df [f"speedup@{ k } " ] = merged_df [f"speedup@{ k } " ].fillna (0.0 )
230+ merged_df [f"efficiency@{ k } " ] = merged_df [f"efficiency@{ k } " ].fillna (0.0 )
231+
232+ # add model name column
233+ if args .model_name :
234+ merged_df .insert (0 , "model_name" , args .model_name )
235+
236+ # clean up column names
237+ column_name_map = {
238+ "model_name" : "model" ,
239+ "parallelism_model" : "execution model" ,
240+ "problem_type" : "problem type" ,
241+ }
242+ merged_df = merged_df .rename (columns = column_name_map )
243+
244+ # write to csv
245+ if args .output :
246+ merged_df .to_csv (args .output , index = False )
247+ else :
248+ pd .set_option ('display.max_columns' , merged_df .shape [1 ]+ 1 )
249+ pd .set_option ('display.max_rows' , merged_df .shape [0 ]+ 1 )
250+ print (merged_df )
251+
126252
127253
128254if __name__ == "__main__" :
0 commit comments