@@ -46,11 +46,6 @@ using namespace PrintingToolbox;
4646#endif
4747#endif
4848
49- vector<string> Profile_Function_tp; /* !< \brief Vector of string names for profiled functions. */
50- vector<double > Profile_Time_tp; /* !< \brief Vector of elapsed time for profiled functions. */
51- vector<double > Profile_ID_tp; /* !< \brief Vector of group ID number for profiled functions. */
52- map<string, vector<int > > Profile_Map_tp; /* !< \brief Map containing the final results for profiled functions. */
53-
5449map<CLong3T, int > GEMM_Profile_MNK; /* !< \brief Map, which maps the GEMM size to the index where
5550 the data for this GEMM is stored in several vectors. */
5651vector<long > GEMM_Profile_NCalls; /* !< \brief Vector, which stores the number of calls to this
@@ -59,8 +54,6 @@ vector<double> GEMM_Profile_TotTime; /*!< \brief Total time spent for this
5954vector<double > GEMM_Profile_MinTime; /* !< \brief Minimum time spent for this GEMM size. */
6055vector<double > GEMM_Profile_MaxTime; /* !< \brief Maximum time spent for this GEMM size. */
6156
62- // #pragma omp threadprivate(Profile_Function_tp, Profile_Time_tp, Profile_ID_tp, Profile_Map_tp)
63-
6457
6558CConfig::CConfig (char case_filename[MAX_STRING_SIZE], SU2_COMPONENT val_software, bool verb_high) {
6659
@@ -9953,191 +9946,6 @@ short CConfig::FindInterfaceMarker(unsigned short iInterface) const {
99539946 return -1 ;
99549947}
99559948
9956- void CConfig::Tick (double *val_start_time) {
9957-
9958- #ifdef PROFILE
9959- *val_start_time = SU2_MPI::Wtime ();
9960- #endif
9961-
9962- }
9963-
9964- void CConfig::Tock (double val_start_time, const string& val_function_name, int val_group_id) {
9965-
9966- #ifdef PROFILE
9967-
9968- double val_stop_time = 0.0 , val_elapsed_time = 0.0 ;
9969-
9970- val_stop_time = SU2_MPI::Wtime ();
9971-
9972- /* --- Compute the elapsed time for this subroutine ---*/
9973- val_elapsed_time = val_stop_time - val_start_time;
9974-
9975- /* --- Store the subroutine name and the elapsed time ---*/
9976- Profile_Function_tp.push_back (val_function_name);
9977- Profile_Time_tp.push_back (val_elapsed_time);
9978- Profile_ID_tp.push_back (val_group_id);
9979-
9980- #endif
9981-
9982- }
9983-
9984- void CConfig::SetProfilingCSV () {
9985-
9986- #ifdef PROFILE
9987-
9988- int rank = MASTER_NODE;
9989- int size = SINGLE_NODE;
9990- #ifdef HAVE_MPI
9991- SU2_MPI::Comm_rank (SU2_MPI::GetComm (), &rank);
9992- SU2_MPI::Comm_size (SU2_MPI::GetComm (), &size);
9993- #endif
9994-
9995- /* --- Each rank has the same stack trace, so the they have the same
9996- function calls and ordering in the vectors. We're going to reduce
9997- the timings from each rank and extract the avg, min, and max timings. ---*/
9998-
9999- /* --- First, create a local mapping, so that we can extract the
10000- min and max values for each function. ---*/
10001-
10002- for (unsigned int i = 0 ; i < Profile_Function_tp.size (); i++) {
10003-
10004- /* --- Add the function and initialize if not already stored (the ID
10005- only needs to be stored the first time).---*/
10006- if (Profile_Map_tp.find (Profile_Function_tp[i]) == Profile_Map_tp.end ()) {
10007-
10008- vector<int > profile; profile.push_back (i);
10009- Profile_Map_tp.insert (pair<string,vector<int > >(Profile_Function_tp[i],profile));
10010-
10011- } else {
10012-
10013- /* --- This function has already been added, so simply increment the
10014- number of calls and total time for this function. ---*/
10015-
10016- Profile_Map_tp[Profile_Function_tp[i]].push_back (i);
10017-
10018- }
10019- }
10020-
10021- /* --- We now have everything gathered by function name, so we can loop over
10022- each function and store the min/max times. ---*/
10023-
10024- int map_size = 0 ;
10025- for (map<string,vector<int > >::iterator it=Profile_Map_tp.begin (); it!=Profile_Map_tp.end (); ++it) {
10026- map_size++;
10027- }
10028-
10029- /* --- Allocate and initialize memory ---*/
10030-
10031- double *l_min_red = nullptr , *l_max_red = nullptr , *l_tot_red = nullptr , *l_avg_red = nullptr ;
10032- int *n_calls_red = nullptr ;
10033- double * l_min = new double [map_size];
10034- double * l_max = new double [map_size];
10035- double * l_tot = new double [map_size];
10036- double * l_avg = new double [map_size];
10037- int * n_calls = new int [map_size];
10038- for (int i = 0 ; i < map_size; i++)
10039- {
10040- l_min[i] = 1e10 ;
10041- l_max[i] = 0.0 ;
10042- l_tot[i] = 0.0 ;
10043- l_avg[i] = 0.0 ;
10044- n_calls[i] = 0 ;
10045- }
10046-
10047- /* --- Collect the info for each function from the current rank ---*/
10048-
10049- int func_counter = 0 ;
10050- for (map<string,vector<int > >::iterator it=Profile_Map_tp.begin (); it!=Profile_Map_tp.end (); ++it) {
10051-
10052- for (unsigned int i = 0 ; i < (it->second ).size (); i++) {
10053- n_calls[func_counter]++;
10054- l_tot[func_counter] += Profile_Time_tp[(it->second )[i]];
10055- if (Profile_Time_tp[(it->second )[i]] < l_min[func_counter])
10056- l_min[func_counter] = Profile_Time_tp[(it->second )[i]];
10057- if (Profile_Time_tp[(it->second )[i]] > l_max[func_counter])
10058- l_max[func_counter] = Profile_Time_tp[(it->second )[i]];
10059-
10060- }
10061- l_avg[func_counter] = l_tot[func_counter]/((double )n_calls[func_counter]);
10062- func_counter++;
10063- }
10064-
10065- /* --- Now reduce the data ---*/
10066-
10067- if (rank == MASTER_NODE) {
10068- l_min_red = new double [map_size];
10069- l_max_red = new double [map_size];
10070- l_tot_red = new double [map_size];
10071- l_avg_red = new double [map_size];
10072- n_calls_red = new int [map_size];
10073- }
10074-
10075- #ifdef HAVE_MPI
10076- MPI_Reduce (n_calls, n_calls_red, map_size, MPI_INT, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm ());
10077- MPI_Reduce (l_tot, l_tot_red, map_size, MPI_DOUBLE, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm ());
10078- MPI_Reduce (l_avg, l_avg_red, map_size, MPI_DOUBLE, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm ());
10079- MPI_Reduce (l_min, l_min_red, map_size, MPI_DOUBLE, MPI_MIN, MASTER_NODE, SU2_MPI::GetComm ());
10080- MPI_Reduce (l_max, l_max_red, map_size, MPI_DOUBLE, MPI_MAX, MASTER_NODE, SU2_MPI::GetComm ());
10081- #else
10082- memcpy (n_calls_red, n_calls, map_size*sizeof (int ));
10083- memcpy (l_tot_red, l_tot, map_size*sizeof (double ));
10084- memcpy (l_avg_red, l_avg, map_size*sizeof (double ));
10085- memcpy (l_min_red, l_min, map_size*sizeof (double ));
10086- memcpy (l_max_red, l_max, map_size*sizeof (double ));
10087- #endif
10088-
10089- /* --- The master rank will write the file ---*/
10090-
10091- if (rank == MASTER_NODE) {
10092-
10093- /* --- Take averages over all ranks on the master ---*/
10094-
10095- for (int i = 0 ; i < map_size; i++) {
10096- l_tot_red[i] = l_tot_red[i]/(double )size;
10097- l_avg_red[i] = l_avg_red[i]/(double )size;
10098- n_calls_red[i] = n_calls_red[i]/size;
10099- }
10100-
10101- /* --- Now write a CSV file with the processed results ---*/
10102-
10103- ofstream Profile_File;
10104- Profile_File.precision (15 );
10105- Profile_File.open (" profiling.csv" );
10106-
10107- /* --- Create the CSV header ---*/
10108-
10109- Profile_File << " \" Function_Name\" , \" N_Calls\" , \" Avg_Total_Time\" , \" Avg_Time\" , \" Min_Time\" , \" Max_Time\" , \" Function_ID\" " << endl;
10110-
10111- /* --- Loop through the map and write the results to the file ---*/
10112-
10113- func_counter = 0 ;
10114- for (map<string,vector<int > >::iterator it=Profile_Map_tp.begin (); it!=Profile_Map_tp.end (); ++it) {
10115-
10116- Profile_File << scientific << it->first << " , " << n_calls_red[func_counter] << " , " << l_tot_red[func_counter] << " , " << l_avg_red[func_counter] << " , " << l_min_red[func_counter] << " , " << l_max_red[func_counter] << " , " << (int )Profile_ID_tp[(it->second )[0 ]] << endl;
10117- func_counter++;
10118- }
10119-
10120- Profile_File.close ();
10121-
10122- }
10123-
10124- delete [] l_min;
10125- delete [] l_max;
10126- delete [] l_avg;
10127- delete [] l_tot;
10128- delete [] n_calls;
10129- if (rank == MASTER_NODE) {
10130- delete [] l_min_red;
10131- delete [] l_max_red;
10132- delete [] l_avg_red;
10133- delete [] l_tot_red;
10134- delete [] n_calls_red;
10135- }
10136-
10137- #endif
10138-
10139- }
10140-
101419949void CConfig::GEMM_Tick (double *val_start_time) const {
101429950
101439951#ifdef PROFILE
0 commit comments