Skip to content

Commit 536a639

Browse files
authored
Make the Tracy integration functional (#2785)
* fix tracy integration and start making it do something usefull * annotate drivers * annotate integration classes * annotate iteration classes * annotate solvers * some solver inlines * annotate linear algebra * cleanup unused profiling stuff * fix
1 parent c5963e3 commit 536a639

69 files changed

Lines changed: 954 additions & 258 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Common/include/CConfig.hpp

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "option_structure.hpp"
4747
#include "containers/container_decorators.hpp"
4848
#include "toolboxes/printing_toolbox.hpp"
49+
#include "tracy_structure.hpp"
4950

5051
#ifdef HAVE_CGNS
5152
#include "cgnslib.h"
@@ -9097,25 +9098,6 @@ class CConfig {
90979098
*/
90989099
unsigned long GetNonphysical_Reconstr(void) const { return Nonphys_Reconstr; }
90999100

9100-
/*!
9101-
* \brief Start the timer for profiling subroutines.
9102-
* \param[in] val_start_time - the value of the start time.
9103-
*/
9104-
void Tick(double *val_start_time);
9105-
9106-
/*!
9107-
* \brief Stop the timer for profiling subroutines and store results.
9108-
* \param[in] val_start_time - the value of the start time.
9109-
* \param[in] val_function_name - string for the name of the profiled subroutine.
9110-
* \param[in] val_group_id - string for the name of the profiled subroutine.
9111-
*/
9112-
void Tock(double val_start_time, const string& val_function_name, int val_group_id);
9113-
9114-
/*!
9115-
* \brief Write a CSV file containing the results of the profiling.
9116-
*/
9117-
void SetProfilingCSV(void);
9118-
91199101
/*!
91209102
* \brief Start the timer for profiling subroutines.
91219103
* \param[in] val_start_time - the value of the start time.

Common/include/linear_algebra/CSysSolve.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ class CSysSolve {
246246
*/
247247
template <class OtherType>
248248
void HandleTemporariesIn(const CSysVector<OtherType>& LinSysRes, CSysVector<OtherType>& LinSysSol) {
249+
SU2_ZONE_SCOPED
249250
if constexpr (std::is_same_v<ScalarType, OtherType>) {
250251
/*--- Same type specialization, temporary variables are not required. ---*/
251252
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {
@@ -273,6 +274,7 @@ class CSysSolve {
273274
*/
274275
template <class OtherType>
275276
void HandleTemporariesOut(CSysVector<OtherType>& LinSysSol) {
277+
SU2_ZONE_SCOPED
276278
if constexpr (std::is_same_v<ScalarType, OtherType>) {
277279
/*--- Same type specialization, temporary variables are not required. ---*/
278280
BEGIN_SU2_OMP_SAFE_GLOBAL_ACCESS {

Common/include/tracy_structure.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
* they can be completely "disabled" when compiling without tracy.
77
* \note Do not include tracy headers explicitly anywhere, use this header instead.
88
* \note To enable tracy, define the TRACY_ENABLE macro during compilation.
9-
* \author Divyaprakash
109
* \version 8.4.0 "Harrier"
1110
*
1211
* SU2 Project Website: https://su2code.github.io
@@ -32,11 +31,16 @@
3231

3332
#pragma once
3433

35-
#ifdef HAVE_TRACY
34+
#ifdef TRACY_ENABLE
3635
#include "tracy/Tracy.hpp"
37-
#define SU2_ZONE_SCOPED ZoneScoped
38-
#define SU2_ZONE_SCOPED_N(name) ZoneScopedN(name)
36+
#define SU2_ZONE_SCOPED ZoneScoped;
37+
#define SU2_ZONE_SCOPED_N(name) ZoneScopedN(name);
3938
#else
4039
#define SU2_ZONE_SCOPED
4140
#define SU2_ZONE_SCOPED_N(name)
4241
#endif
42+
43+
#define BEGIN_SU2_ZONE_N(name) \
44+
{ \
45+
SU2_ZONE_SCOPED_N(name)
46+
#define END_SU2_ZONE }

Common/src/CConfig.cpp

Lines changed: 0 additions & 192 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,6 @@ using namespace PrintingToolbox;
4646
#endif
4747
#endif
4848

49-
vector<string> Profile_Function_tp; /*!< \brief Vector of string names for profiled functions. */
50-
vector<double> Profile_Time_tp; /*!< \brief Vector of elapsed time for profiled functions. */
51-
vector<double> Profile_ID_tp; /*!< \brief Vector of group ID number for profiled functions. */
52-
map<string, vector<int> > Profile_Map_tp; /*!< \brief Map containing the final results for profiled functions. */
53-
5449
map<CLong3T, int> GEMM_Profile_MNK; /*!< \brief Map, which maps the GEMM size to the index where
5550
the data for this GEMM is stored in several vectors. */
5651
vector<long> GEMM_Profile_NCalls; /*!< \brief Vector, which stores the number of calls to this
@@ -59,8 +54,6 @@ vector<double> GEMM_Profile_TotTime; /*!< \brief Total time spent for this
5954
vector<double> GEMM_Profile_MinTime; /*!< \brief Minimum time spent for this GEMM size. */
6055
vector<double> GEMM_Profile_MaxTime; /*!< \brief Maximum time spent for this GEMM size. */
6156

62-
//#pragma omp threadprivate(Profile_Function_tp, Profile_Time_tp, Profile_ID_tp, Profile_Map_tp)
63-
6457

6558
CConfig::CConfig(char case_filename[MAX_STRING_SIZE], SU2_COMPONENT val_software, bool verb_high) {
6659

@@ -9953,191 +9946,6 @@ short CConfig::FindInterfaceMarker(unsigned short iInterface) const {
99539946
return -1;
99549947
}
99559948

9956-
void CConfig::Tick(double *val_start_time) {
9957-
9958-
#ifdef PROFILE
9959-
*val_start_time = SU2_MPI::Wtime();
9960-
#endif
9961-
9962-
}
9963-
9964-
void CConfig::Tock(double val_start_time, const string& val_function_name, int val_group_id) {
9965-
9966-
#ifdef PROFILE
9967-
9968-
double val_stop_time = 0.0, val_elapsed_time = 0.0;
9969-
9970-
val_stop_time = SU2_MPI::Wtime();
9971-
9972-
/*--- Compute the elapsed time for this subroutine ---*/
9973-
val_elapsed_time = val_stop_time - val_start_time;
9974-
9975-
/*--- Store the subroutine name and the elapsed time ---*/
9976-
Profile_Function_tp.push_back(val_function_name);
9977-
Profile_Time_tp.push_back(val_elapsed_time);
9978-
Profile_ID_tp.push_back(val_group_id);
9979-
9980-
#endif
9981-
9982-
}
9983-
9984-
void CConfig::SetProfilingCSV() {
9985-
9986-
#ifdef PROFILE
9987-
9988-
int rank = MASTER_NODE;
9989-
int size = SINGLE_NODE;
9990-
#ifdef HAVE_MPI
9991-
SU2_MPI::Comm_rank(SU2_MPI::GetComm(), &rank);
9992-
SU2_MPI::Comm_size(SU2_MPI::GetComm(), &size);
9993-
#endif
9994-
9995-
/*--- Each rank has the same stack trace, so the they have the same
9996-
function calls and ordering in the vectors. We're going to reduce
9997-
the timings from each rank and extract the avg, min, and max timings. ---*/
9998-
9999-
/*--- First, create a local mapping, so that we can extract the
10000-
min and max values for each function. ---*/
10001-
10002-
for (unsigned int i = 0; i < Profile_Function_tp.size(); i++) {
10003-
10004-
/*--- Add the function and initialize if not already stored (the ID
10005-
only needs to be stored the first time).---*/
10006-
if (Profile_Map_tp.find(Profile_Function_tp[i]) == Profile_Map_tp.end()) {
10007-
10008-
vector<int> profile; profile.push_back(i);
10009-
Profile_Map_tp.insert(pair<string,vector<int> >(Profile_Function_tp[i],profile));
10010-
10011-
} else {
10012-
10013-
/*--- This function has already been added, so simply increment the
10014-
number of calls and total time for this function. ---*/
10015-
10016-
Profile_Map_tp[Profile_Function_tp[i]].push_back(i);
10017-
10018-
}
10019-
}
10020-
10021-
/*--- We now have everything gathered by function name, so we can loop over
10022-
each function and store the min/max times. ---*/
10023-
10024-
int map_size = 0;
10025-
for (map<string,vector<int> >::iterator it=Profile_Map_tp.begin(); it!=Profile_Map_tp.end(); ++it) {
10026-
map_size++;
10027-
}
10028-
10029-
/*--- Allocate and initialize memory ---*/
10030-
10031-
double *l_min_red = nullptr, *l_max_red = nullptr, *l_tot_red = nullptr, *l_avg_red = nullptr;
10032-
int *n_calls_red = nullptr;
10033-
double* l_min = new double[map_size];
10034-
double* l_max = new double[map_size];
10035-
double* l_tot = new double[map_size];
10036-
double* l_avg = new double[map_size];
10037-
int* n_calls = new int[map_size];
10038-
for (int i = 0; i < map_size; i++)
10039-
{
10040-
l_min[i] = 1e10;
10041-
l_max[i] = 0.0;
10042-
l_tot[i] = 0.0;
10043-
l_avg[i] = 0.0;
10044-
n_calls[i] = 0;
10045-
}
10046-
10047-
/*--- Collect the info for each function from the current rank ---*/
10048-
10049-
int func_counter = 0;
10050-
for (map<string,vector<int> >::iterator it=Profile_Map_tp.begin(); it!=Profile_Map_tp.end(); ++it) {
10051-
10052-
for (unsigned int i = 0; i < (it->second).size(); i++) {
10053-
n_calls[func_counter]++;
10054-
l_tot[func_counter] += Profile_Time_tp[(it->second)[i]];
10055-
if (Profile_Time_tp[(it->second)[i]] < l_min[func_counter])
10056-
l_min[func_counter] = Profile_Time_tp[(it->second)[i]];
10057-
if (Profile_Time_tp[(it->second)[i]] > l_max[func_counter])
10058-
l_max[func_counter] = Profile_Time_tp[(it->second)[i]];
10059-
10060-
}
10061-
l_avg[func_counter] = l_tot[func_counter]/((double)n_calls[func_counter]);
10062-
func_counter++;
10063-
}
10064-
10065-
/*--- Now reduce the data ---*/
10066-
10067-
if (rank == MASTER_NODE) {
10068-
l_min_red = new double[map_size];
10069-
l_max_red = new double[map_size];
10070-
l_tot_red = new double[map_size];
10071-
l_avg_red = new double[map_size];
10072-
n_calls_red = new int[map_size];
10073-
}
10074-
10075-
#ifdef HAVE_MPI
10076-
MPI_Reduce(n_calls, n_calls_red, map_size, MPI_INT, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
10077-
MPI_Reduce(l_tot, l_tot_red, map_size, MPI_DOUBLE, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
10078-
MPI_Reduce(l_avg, l_avg_red, map_size, MPI_DOUBLE, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
10079-
MPI_Reduce(l_min, l_min_red, map_size, MPI_DOUBLE, MPI_MIN, MASTER_NODE, SU2_MPI::GetComm());
10080-
MPI_Reduce(l_max, l_max_red, map_size, MPI_DOUBLE, MPI_MAX, MASTER_NODE, SU2_MPI::GetComm());
10081-
#else
10082-
memcpy(n_calls_red, n_calls, map_size*sizeof(int));
10083-
memcpy(l_tot_red, l_tot, map_size*sizeof(double));
10084-
memcpy(l_avg_red, l_avg, map_size*sizeof(double));
10085-
memcpy(l_min_red, l_min, map_size*sizeof(double));
10086-
memcpy(l_max_red, l_max, map_size*sizeof(double));
10087-
#endif
10088-
10089-
/*--- The master rank will write the file ---*/
10090-
10091-
if (rank == MASTER_NODE) {
10092-
10093-
/*--- Take averages over all ranks on the master ---*/
10094-
10095-
for (int i = 0; i < map_size; i++) {
10096-
l_tot_red[i] = l_tot_red[i]/(double)size;
10097-
l_avg_red[i] = l_avg_red[i]/(double)size;
10098-
n_calls_red[i] = n_calls_red[i]/size;
10099-
}
10100-
10101-
/*--- Now write a CSV file with the processed results ---*/
10102-
10103-
ofstream Profile_File;
10104-
Profile_File.precision(15);
10105-
Profile_File.open("profiling.csv");
10106-
10107-
/*--- Create the CSV header ---*/
10108-
10109-
Profile_File << "\"Function_Name\", \"N_Calls\", \"Avg_Total_Time\", \"Avg_Time\", \"Min_Time\", \"Max_Time\", \"Function_ID\"" << endl;
10110-
10111-
/*--- Loop through the map and write the results to the file ---*/
10112-
10113-
func_counter = 0;
10114-
for (map<string,vector<int> >::iterator it=Profile_Map_tp.begin(); it!=Profile_Map_tp.end(); ++it) {
10115-
10116-
Profile_File << scientific << it->first << ", " << n_calls_red[func_counter] << ", " << l_tot_red[func_counter] << ", " << l_avg_red[func_counter] << ", " << l_min_red[func_counter] << ", " << l_max_red[func_counter] << ", " << (int)Profile_ID_tp[(it->second)[0]] << endl;
10117-
func_counter++;
10118-
}
10119-
10120-
Profile_File.close();
10121-
10122-
}
10123-
10124-
delete [] l_min;
10125-
delete [] l_max;
10126-
delete [] l_avg;
10127-
delete [] l_tot;
10128-
delete [] n_calls;
10129-
if (rank == MASTER_NODE) {
10130-
delete [] l_min_red;
10131-
delete [] l_max_red;
10132-
delete [] l_avg_red;
10133-
delete [] l_tot_red;
10134-
delete [] n_calls_red;
10135-
}
10136-
10137-
#endif
10138-
10139-
}
10140-
101419949
void CConfig::GEMM_Tick(double *val_start_time) const {
101429950

101439951
#ifdef PROFILE

0 commit comments

Comments
 (0)