@@ -66,7 +66,7 @@ def passk(df: pd.DataFrame, k: int) -> pd.DataFrame:
6666 agg [f"pass@{ k } " ] = agg .apply (lambda x : _passk (x ["total_runs" ], x ["valid_count" ], k ), axis = 1 )
6767 return agg .groupby (["parallelism_model" , "problem_type" ]).agg ({f"pass@{ k } " : "mean" })
6868
69- def _speedupk (runtimes : Union [pd .Series , np .ndarray ], baseline_runtime : float , k : int ) -> float :
69+ def _speedupk (runtimes : Union [pd .Series , np .ndarray ], baseline_runtime : float , k : int , col_name : str = 'speedup@{}' ) -> float :
7070 """ Compute the speedup@k metric """
7171 # create a copy of the runtimes
7272 if isinstance (runtimes , pd .Series ):
@@ -84,7 +84,7 @@ def _speedupk(runtimes: Union[pd.Series, np.ndarray], baseline_runtime: float, k
8484 num = nCr (j - 1 , k - 1 ) * baseline_runtime
8585 den = nCr (num_samples , k ) * max (runtimes [j - 1 ], 1e-8 )
8686 sum += num / den
87- return pd .Series ({f"speedup@ { k } " : sum })
87+ return pd .Series ({col_name . format ( k ) : sum })
8888
8989def speedupk (df : pd .DataFrame , k : int , n : int ) -> pd .DataFrame :
9090 """ Compute the speedup@k metric """
@@ -116,7 +116,41 @@ def speedupk(df: pd.DataFrame, k: int, n: int) -> pd.DataFrame:
116116
117117 return df
118118
119- def _efficiencyk (runtimes : Union [pd .Series , np .ndarray ], baseline_runtime : float , k : int , n_resources : Union [pd .Series , np .ndarray ]) -> float :
119+
120+
121+ def speedupk_max (df : pd .DataFrame , k : int ) -> pd .DataFrame :
122+ """ Compute the speedup_max@k. Same as speedup_n@k, but instead of a fixed n
123+ we use the n that gives the max speedup
124+ """
125+ df = df .copy ()
126+ df .drop (columns = ['prompt' ], inplace = True )
127+
128+ # get all the runs where the submission is valid
129+ df = df [df ["is_valid" ] == True ]
130+
131+ # choose the min across processor counts
132+ df ["runtime" ] = df .groupby (["name" , "parallelism_model" , "output_idx" ])["runtime" ].transform ("min" )
133+
134+ # use the min best_sequential_runtime
135+ df ["best_sequential_runtime" ] = df .groupby (["name" , "parallelism_model" , "output_idx" ])["best_sequential_runtime" ].transform ("min" )
136+
137+ # select only run_idx 0
138+ df ["run_idx" ] = df ["run_idx" ].astype (int )
139+ df = df [df ["run_idx" ] == 0 ]
140+
141+ # group by name, parallelism_model, and output_idx and call _speedupk
142+ df = df .groupby (["name" , "parallelism_model" , "problem_type" ]).apply (
143+ lambda row : _speedupk (row ["runtime" ], np .min (row ["best_sequential_runtime" ]), k , col_name = "speedup_max@{}" )
144+ ).reset_index ()
145+
146+ # compute the mean speedup_max@k
147+ df = df .groupby (["parallelism_model" , "problem_type" ]).agg ({f"speedup_max@{ k } " : "mean" })
148+
149+ return df
150+
151+
152+
153+ def _efficiencyk (runtimes : Union [pd .Series , np .ndarray ], baseline_runtime : float , k : int , n_resources : Union [pd .Series , np .ndarray ], col_name : str = 'efficiency@{}' ) -> float :
120154 """ Compute the efficiency@k metric """
121155 # create a copy of the runtimes
122156 if isinstance (runtimes , pd .Series ):
@@ -139,7 +173,7 @@ def _efficiencyk(runtimes: Union[pd.Series, np.ndarray], baseline_runtime: float
139173 num = nCr (j - 1 , k - 1 ) * baseline_runtime
140174 den = nCr (num_samples , k ) * max (runtimes [j - 1 ], 1e-8 ) * n_resources [j - 1 ]
141175 sum += num / den
142- return pd .Series ({f"efficiency@ { k } " : sum })
176+ return pd .Series ({col_name . format ( k ) : sum })
143177
144178def efficiencyk (df : pd .DataFrame , k : int , n : int ) -> pd .DataFrame :
145179 """ Compute the efficiency@k metric """
@@ -150,7 +184,7 @@ def efficiencyk(df: pd.DataFrame, k: int, n: int) -> pd.DataFrame:
150184
151185 # choose processor count; hardcoded right now
152186 df = df [(df ["parallelism_model" ] == "serial" ) |
153- (df ["parallelism_model" ] == "cuda" ) |
187+ (df ["parallelism_model" ] == "cuda" ) |
154188 (df ["parallelism_model" ] == "hip" ) |
155189 ((df ["parallelism_model" ] == "kokkos" ) & (df ["num_threads" ] == 32 )) |
156190 ((df ["parallelism_model" ] == "omp" ) & (df ["num_threads" ] == 32 )) |
@@ -163,7 +197,7 @@ def efficiencyk(df: pd.DataFrame, k: int, n: int) -> pd.DataFrame:
163197 df .loc [df ["parallelism_model" ] == "cuda" , "n_resources" ] = df ["problem_size" ]
164198 df .loc [df ["parallelism_model" ] == "hip" , "n_resources" ] = df ["problem_size" ]
165199 df .loc [df ["parallelism_model" ] == "kokkos" , "n_resources" ] = 32
166- df .loc [df ["parallelism_model" ] == "omp" , "n_resources" ] = 32
200+ df .loc [df ["parallelism_model" ] == "omp" , "n_resources" ] = 8
167201 df .loc [df ["parallelism_model" ] == "mpi" , "n_resources" ] = 512
168202 df .loc [df ["parallelism_model" ] == "mpi+omp" , "n_resources" ] = 4 * 64
169203
@@ -182,6 +216,41 @@ def efficiencyk(df: pd.DataFrame, k: int, n: int) -> pd.DataFrame:
182216
183217 return df
184218
219+
220+ def efficiencyk_max (df : pd .DataFrame , k : int ) -> pd .DataFrame :
221+ """ Compute the efficiency_max@k metric """
222+ df = df .copy ()
223+
224+ # get all runs where is_valid is true
225+ df = df [df ["is_valid" ] == True ]
226+
227+ # set n_resources column
228+ df ["n_resources" ] = 1
229+ df .loc [df ["parallelism_model" ] == "cuda" , "n_resources" ] = df ["problem_size" ]
230+ df .loc [df ["parallelism_model" ] == "hip" , "n_resources" ] = df ["problem_size" ]
231+ df .loc [df ["parallelism_model" ] == "kokkos" , "n_resources" ] = df ["num_threads" ]
232+ df .loc [df ["parallelism_model" ] == "omp" , "n_resources" ] = df ["num_threads" ]
233+ df .loc [df ["parallelism_model" ] == "mpi" , "n_resources" ] = df ["num_procs" ]
234+ df .loc [df ["parallelism_model" ] == "mpi+omp" , "n_resources" ] = df ["num_procs" ] * df ["num_threads" ]
235+
236+ # choose the row with min num_resources * runtime
237+ df = df .groupby (["name" , "parallelism_model" , "output_idx" ]).apply (
238+ lambda row : row .iloc [np .argmin (row ["runtime" ] * row ["n_resources" ])]
239+ ).reset_index (drop = True )
240+
241+ # use the min best_sequential_runtime
242+ df ["best_sequential_runtime" ] = df .groupby (["name" , "parallelism_model" , "output_idx" ])["best_sequential_runtime" ].transform ("min" )
243+
244+ # group by name, parallelism_model, and output_idx and call _efficiencyk
245+ df = df .groupby (["name" , "parallelism_model" , "problem_type" ]).apply (
246+ lambda row : _efficiencyk (row ["runtime" ], np .min (row ["best_sequential_runtime" ]), k , row ["n_resources" ], col_name = 'efficiency_max@{}' )
247+ ).reset_index ()
248+
249+ # compute the mean efficiency_max@k
250+ df = df .groupby (["parallelism_model" , "problem_type" ]).agg ({f"efficiency_max@{ k } " : "mean" })
251+
252+ return df
253+
185254def parse_problem_size (problem_size : str ) -> int :
186255 """ problem size is of format '(1<<n)' """
187256 num = problem_size .split ("<<" )[1 ][:- 1 ]
@@ -216,8 +285,10 @@ def main():
216285 build_values = buildk (df , k )
217286 pass_values = passk (valid_runs , k )
218287 speedup_values = speedupk (df , k , args .n )
288+ speedup_max_values = speedupk_max (df , k )
219289 efficiency_values = efficiencyk (df , k , args .n )
220- all_results .extend ([build_values , pass_values , speedup_values , efficiency_values ])
290+ efficiency_max_values = efficiencyk_max (df , k )
291+ all_results .extend ([build_values , pass_values , speedup_values , speedup_max_values , efficiency_values , efficiency_max_values ])
221292
222293 # merge all_results; each df has one column and the same index
223294 # build a new df with all the columns and the same index
@@ -227,7 +298,9 @@ def main():
227298 # replace NaN speedup@k values with 0.0
228299 for k in args .k :
229300 merged_df [f"speedup@{ k } " ] = merged_df [f"speedup@{ k } " ].fillna (0.0 )
301+ merged_df [f"speedup_max@{ k } " ] = merged_df [f"speedup_max@{ k } " ].fillna (0.0 )
230302 merged_df [f"efficiency@{ k } " ] = merged_df [f"efficiency@{ k } " ].fillna (0.0 )
303+ merged_df [f"efficiency_max@{ k } " ] = merged_df [f"efficiency_max@{ k } " ].fillna (0.0 )
231304
232305 # add model name column
233306 if args .model_name :
0 commit comments