Skip to content

Commit 8716a9d

Browse files
committed
Add timing functionality to synthesis tools and improve performance tracking
- Integrated `fast_io::timer` to measure and report execution time for key operations in `verilog2penl.cpp` and `verilog2plsav.cpp`. - Updated command-line options to include a `--time` flag for enabling timing reports. - Enhanced synthesis and compilation functions to conditionally track execution time, improving performance insights during synthesis processes. - Utilized `std::move` in `pe_synth_cuda_u64_cones.cu` for efficient resource management when pushing back device structures.
1 parent 5ecf1a0 commit 8716a9d

3 files changed

Lines changed: 68 additions & 8 deletions

File tree

src/pe_synth_cuda_u64_cones.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <deque>
99
#include <mutex>
1010
#include <new>
11+
#include <utility>
1112
#include <vector>
1213

1314
#include <cuda_runtime.h>
@@ -1111,7 +1112,7 @@ namespace phy_engine::verilog::digital::details
11111112
d.begin = begin;
11121113
d.end = end;
11131114
d.stride_words = stride_words;
1114-
h->devs.push_back(d);
1115+
h->devs.push_back(std::move(d));
11151116
}
11161117

11171118
bool ok = true;

src/verilog2penl.cpp

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
#include <phy_engine/pe_nl_fileformat/pe_nl_fileformat.h>
1212

13+
#include <fast_io/fast_io_driver/timer.h>
14+
1315
#include <cstddef>
1416
#include <cstdint>
1517
#include <cstdlib>
@@ -91,6 +93,7 @@ namespace
9193
" --cuda-min-batch N Minimum cone batch size before offloading (default: 1024)\n"
9294
" --cuda-expand-windows (Optional) Under -Ocuda: increase some bounded windows (resub/sweep) for quality at higher runtime\n"
9395
" --cuda-trace Collect per-pass CUDA telemetry (printed in --report)\n"
96+
" --time Print per-step wall time using fast_io::timer\n"
9497
" --opt-cost gate|weighted Omax: objective cost model (default: gate)\n"
9598
" --opt-weight-NOT N Omax: weighted cost (default: 1)\n"
9699
" --opt-weight-AND N Omax: weighted cost (default: 1)\n"
@@ -421,6 +424,7 @@ int main(int argc, char** argv)
421424
bool const cuda_opt = has_flag(argc, argv, "--cuda-opt") || ocuda;
422425
bool const cuda_expand_windows = has_flag(argc, argv, "--cuda-expand-windows");
423426
bool const cuda_trace = has_flag(argc, argv, "--cuda-trace");
427+
bool const step_time = has_flag(argc, argv, "--time") || has_flag(argc, argv, "--timing");
424428
std::uint32_t cuda_device_mask = 0;
425429
if(auto s = arg_after(argc, argv, "--cuda-device-mask"))
426430
{
@@ -584,7 +588,12 @@ int main(int argc, char** argv)
584588

585589
auto const in_path_s = in_path.string();
586590
::fast_io::io::perr(::fast_io::err(), "[verilog2penl] compile ", ::fast_io::mnp::os_c_str(in_path_s.c_str()), "\n");
587-
auto cr = ::phy_engine::verilog::digital::compile(src, copt);
591+
auto cr = [&]()
592+
{
593+
if(!step_time) { return ::phy_engine::verilog::digital::compile(src, copt); }
594+
::fast_io::timer t{u8"[verilog2penl] time.compile"};
595+
return ::phy_engine::verilog::digital::compile(src, copt);
596+
}();
588597
if(!cr.errors.empty())
589598
{
590599
diagnostic_options dop{};
@@ -727,7 +736,17 @@ int main(int argc, char** argv)
727736
opt.report = show_report ? __builtin_addressof(rep) : nullptr;
728737
729738
::fast_io::io::perr(::fast_io::err(), "[verilog2penl] synthesize_to_pe_netlist\n");
730-
if(!::phy_engine::verilog::digital::synthesize_to_pe_netlist(nl, top_inst, port_nodes, &err, opt))
739+
bool synth_ok = false;
740+
if(step_time)
741+
{
742+
::fast_io::timer t{u8"[verilog2penl] time.synthesize_to_pe_netlist"};
743+
synth_ok = ::phy_engine::verilog::digital::synthesize_to_pe_netlist(nl, top_inst, port_nodes, &err, opt);
744+
}
745+
else
746+
{
747+
synth_ok = ::phy_engine::verilog::digital::synthesize_to_pe_netlist(nl, top_inst, port_nodes, &err, opt);
748+
}
749+
if(!synth_ok)
731750
{
732751
::fast_io::io::perr(::fast_io::u8err(), u8"error: synthesize_to_pe_netlist failed: ", u8sv{err.message.data(), err.message.size()}, u8"\n");
733752
return 14;
@@ -818,7 +837,12 @@ int main(int argc, char** argv)
818837
sopt.layout = *layout;
819838
820839
::fast_io::io::perr(::fast_io::err(), "[verilog2penl] save\n");
821-
auto st = ::phy_engine::pe_nl_fileformat::save(out_path, c, sopt);
840+
auto st = [&]()
841+
{
842+
if(!step_time) { return ::phy_engine::pe_nl_fileformat::save(out_path, c, sopt); }
843+
::fast_io::timer t{u8"[verilog2penl] time.save"};
844+
return ::phy_engine::pe_nl_fileformat::save(out_path, c, sopt);
845+
}();
822846
if(!st)
823847
{
824848
::fast_io::io::perr(::fast_io::err(), "error: save failed: ", ::fast_io::mnp::os_c_str(st.message.c_str()), "\n");

src/verilog2plsav.cpp

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include <phy_engine/phy_lab_wrapper/pe_to_pl.h>
1010
#include <phy_engine/phy_lab_wrapper/auto_layout/auto_layout.h>
1111

12+
#include <fast_io/fast_io_driver/timer.h>
13+
1214
#include <cmath>
1315
#include <cstddef>
1416
#include <cstdint>
@@ -350,6 +352,7 @@ static void usage(char const* argv0)
350352
" --cuda-min-batch N Minimum cone batch size before offloading (default: 1024)\n"
351353
" --cuda-expand-windows (Optional) Under -Ocuda: increase some bounded windows (resub/sweep/decomp) for quality at higher runtime\n"
352354
" --cuda-trace Collect per-pass CUDA telemetry (printed in --report)\n"
355+
" --time Print per-step wall time using fast_io::timer\n"
353356
" --opt-cost gate|weighted Omax: objective cost model (default: gate)\n"
354357
" --opt-weight-NOT N Omax: weighted cost (default: 1)\n"
355358
" --opt-weight-AND N Omax: weighted cost (default: 1)\n"
@@ -703,6 +706,7 @@ int main(int argc, char** argv)
703706
bool const cuda_opt = has_flag(argc, argv, "--cuda-opt") || ocuda;
704707
bool const cuda_expand_windows = has_flag(argc, argv, "--cuda-expand-windows");
705708
bool const cuda_trace = has_flag(argc, argv, "--cuda-trace");
709+
bool const step_time = has_flag(argc, argv, "--time") || has_flag(argc, argv, "--timing");
706710
std::uint32_t cuda_device_mask = 0;
707711
if(auto s = arg_after(argc, argv, "--cuda-device-mask"))
708712
{
@@ -923,7 +927,12 @@ int main(int argc, char** argv)
923927
924928
auto const in_path_s = in_path.string();
925929
::fast_io::io::perr(::fast_io::err(), "[verilog2plsav] compile ", ::fast_io::mnp::os_c_str(in_path_s.c_str()), "\n");
926-
auto cr = ::phy_engine::verilog::digital::compile(src, copt);
930+
auto cr = [&]()
931+
{
932+
if(!step_time) { return ::phy_engine::verilog::digital::compile(src, copt); }
933+
::fast_io::timer t{u8"[verilog2plsav] time.compile"};
934+
return ::phy_engine::verilog::digital::compile(src, copt);
935+
}();
927936
if(!cr.errors.empty())
928937
{
929938
diagnostic_options dop{};
@@ -1069,7 +1078,17 @@ int main(int argc, char** argv)
10691078
opt.report = show_report ? __builtin_addressof(rep) : nullptr;
10701079
10711080
::fast_io::io::perr(::fast_io::err(), "[verilog2plsav] synthesize_to_pe_netlist\n");
1072-
if(!::phy_engine::verilog::digital::synthesize_to_pe_netlist(nl, top_inst, ports, &err, opt))
1081+
bool synth_ok = false;
1082+
if(step_time)
1083+
{
1084+
::fast_io::timer t{u8"[verilog2plsav] time.synthesize_to_pe_netlist"};
1085+
synth_ok = ::phy_engine::verilog::digital::synthesize_to_pe_netlist(nl, top_inst, ports, &err, opt);
1086+
}
1087+
else
1088+
{
1089+
synth_ok = ::phy_engine::verilog::digital::synthesize_to_pe_netlist(nl, top_inst, ports, &err, opt);
1090+
}
1091+
if(!synth_ok)
10731092
{
10741093
::fast_io::io::perr(::fast_io::u8err(),
10751094
u8"error: synthesize_to_pe_netlist failed: ",
@@ -1185,7 +1204,12 @@ int main(int argc, char** argv)
11851204
};
11861205
11871206
::fast_io::io::perr(::fast_io::err(), "[verilog2plsav] pe_to_pl convert\n");
1188-
auto r = ::phy_engine::phy_lab_wrapper::pe_to_pl::convert(nl, popt);
1207+
auto r = [&]()
1208+
{
1209+
if(!step_time) { return ::phy_engine::phy_lab_wrapper::pe_to_pl::convert(nl, popt); }
1210+
::fast_io::timer t{u8"[verilog2plsav] time.pe_to_pl.convert"};
1211+
return ::phy_engine::phy_lab_wrapper::pe_to_pl::convert(nl, popt);
1212+
}();
11891213
11901214
// Keep IO elements fixed for layout.
11911215
for(auto const& e : r.ex.elements())
@@ -1198,6 +1222,9 @@ int main(int argc, char** argv)
11981222
11991223
// Auto-layout internal elements into the requested region.
12001224
{
1225+
::std::optional<::fast_io::timer> t{};
1226+
if(step_time) { t.emplace(u8"[verilog2plsav] time.auto_layout"); }
1227+
12011228
::phy_engine::phy_lab_wrapper::auto_layout::options aopt{};
12021229
aopt.layout_mode = *layout_mode;
12031230
aopt.respect_fixed_elements = true;
@@ -1324,7 +1351,15 @@ int main(int argc, char** argv)
13241351
"\n");
13251352
}
13261353
1327-
r.ex.save(out_path, 2);
1354+
if(step_time)
1355+
{
1356+
::fast_io::timer t{u8"[verilog2plsav] time.save"};
1357+
r.ex.save(out_path, 2);
1358+
}
1359+
else
1360+
{
1361+
r.ex.save(out_path, 2);
1362+
}
13281363
13291364
if(!std::filesystem::exists(out_path))
13301365
{

0 commit comments

Comments
 (0)