@@ -48,6 +48,7 @@ struct OpenCLBenchmarkEnvironment
4848 }
4949 if (printUsage || printHelp) {
5050 fprintf (stderr, " %s" , op.help ().c_str ());
51+ fprintf (stderr, " Pass '--help' to view Google Benchmark options.\n " );
5152 }
5253
5354 std::vector<cl::Platform> platforms;
@@ -360,6 +361,40 @@ BENCHMARK_DEFINE_F(Kernel, clEnqueueNDRangeKernel_1x1_Event)(benchmark::State& s
360361}
361362BENCHMARK_REGISTER_F (Kernel, clEnqueueNDRangeKernel_1x1_Event)->Arg(1 )->Arg(32 )->Arg(512 )->Arg(2048 );
362363
364+ BENCHMARK_DEFINE_F (Kernel, clEnqueueNDRangeKernel_overhead)(benchmark::State& state)
365+ {
366+ size_t maxWGS = kernel.getWorkGroupInfo <CL_KERNEL_WORK_GROUP_SIZE>(env.device );
367+ if (maxWGS < 256 ) {
368+ state.SkipWithError (" kernel work-group size is too small" );
369+ }
370+
371+ const bool npot = state.range (0 ) == 0 ;
372+ const int nwgs = (int )state.range (1 );
373+
374+ const size_t lwx = npot ? 7 : 8 ;
375+ const size_t lwy = npot ? 7 : 8 ;
376+ const size_t lwz = npot ? 5 : 4 ;
377+
378+ const size_t work_dim = 3 ;
379+ const size_t global_work_size[work_dim] = { nwgs * lwx, lwy, lwz };
380+ const size_t local_work_size[work_dim] = { lwx, lwy, lwz };
381+
382+ for (auto _ : state) {
383+ clEnqueueNDRangeKernel (
384+ queue (),
385+ kernel (),
386+ work_dim,
387+ NULL ,
388+ global_work_size,
389+ local_work_size,
390+ 0 ,
391+ NULL ,
392+ NULL );
393+ clFinish (queue ());
394+ }
395+ }
396+ BENCHMARK_REGISTER_F (Kernel, clEnqueueNDRangeKernel_overhead)->ArgsProduct({{0 , 1 }, {1 , 1024 , 32 *1024 *1024 }});
397+
363398struct SVMKernel : public benchmark ::Fixture
364399{
365400 cl::Program program;
@@ -480,53 +515,23 @@ struct USMMemFill : public benchmark::Fixture
480515 }
481516};
482517
483- BENCHMARK_DEFINE_F (USMMemFill, clEnqueueMemsetINTEL_dptr )(benchmark::State& state)
518+ BENCHMARK_DEFINE_F (USMMemFill, clEnqueueMemsetINTEL )(benchmark::State& state)
484519{
485- if (dptr == NULL ) {
486- state.SkipWithError (" unsupported" );
487- }
488- for (auto _ : state) {
489- clEnqueueMemsetINTEL (
490- queue (),
491- dptr,
492- 0 ,
493- sz,
494- 0 ,
495- NULL ,
496- NULL );
497- queue.finish ();
498- }
499- }
500- BENCHMARK_REGISTER_F (USMMemFill, clEnqueueMemsetINTEL_dptr);
520+ void * dst = nullptr ;
501521
502- BENCHMARK_DEFINE_F (USMMemFill, clEnqueueMemsetINTEL_hptr)(benchmark::State& state)
503- {
504- if (hptr == NULL ) {
505- state.SkipWithError (" unsupported" );
522+ switch (state.range (0 )) {
523+ case 0 : dst = dptr; break ;
524+ case 1 : dst = hptr; break ;
525+ case 2 : dst = sptr; break ;
526+ default : state.SkipWithError (" unknown mem type" );
506527 }
507- for (auto _ : state) {
508- clEnqueueMemsetINTEL (
509- queue (),
510- hptr,
511- 0 ,
512- sz,
513- 0 ,
514- NULL ,
515- NULL );
516- queue.finish ();
517- }
518- }
519- BENCHMARK_REGISTER_F (USMMemFill, clEnqueueMemsetINTEL_hptr);
520-
521- BENCHMARK_DEFINE_F (USMMemFill, clEnqueueMemsetINTEL_sptr)(benchmark::State& state)
522- {
523- if (sptr == NULL ) {
524- state.SkipWithError (" unsupported" );
528+ if (dst == nullptr ) {
529+ state.SkipWithError (" unsupported mem type" );
525530 }
526531 for (auto _ : state) {
527532 clEnqueueMemsetINTEL (
528533 queue (),
529- sptr ,
534+ dst ,
530535 0 ,
531536 sz,
532537 0 ,
@@ -535,63 +540,27 @@ BENCHMARK_DEFINE_F(USMMemFill, clEnqueueMemsetINTEL_sptr)(benchmark::State& stat
535540 queue.finish ();
536541 }
537542}
538- BENCHMARK_REGISTER_F (USMMemFill, clEnqueueMemsetINTEL_sptr );
543+ BENCHMARK_REGISTER_F (USMMemFill, clEnqueueMemsetINTEL)->ArgsProduct({{ 0 , 1 , 2 }} );
539544
540- BENCHMARK_DEFINE_F (USMMemFill, clEnqueueMemFillINTEL_dptr )(benchmark::State& state)
545+ BENCHMARK_DEFINE_F (USMMemFill, clEnqueueMemFillINTEL )(benchmark::State& state)
541546{
542- if (dptr == NULL ) {
543- state.SkipWithError (" unsupported" );
547+ void * dst = nullptr ;
548+ switch (state.range (1 )) {
549+ case 0 : dst = dptr; break ;
550+ case 1 : dst = hptr; break ;
551+ case 2 : dst = sptr; break ;
552+ default : state.SkipWithError (" unknown mem type" ); break ;
544553 }
545- const cl_ulong pattern = 0 ;
546- const size_t patternSize = state.range (0 );
547- for (auto _ : state) {
548- clEnqueueMemFillINTEL (
549- queue (),
550- dptr,
551- &pattern,
552- patternSize,
553- sz,
554- 0 ,
555- NULL ,
556- NULL );
557- queue.finish ();
554+ if (dst == nullptr ) {
555+ state.SkipWithError (" unsupported mem type" );
558556 }
559- }
560- BENCHMARK_REGISTER_F (USMMemFill, clEnqueueMemFillINTEL_dptr)->Arg(1 )->Arg(4 )->Arg(8 );
561557
562- BENCHMARK_DEFINE_F (USMMemFill, clEnqueueMemFillINTEL_hptr)(benchmark::State& state)
563- {
564- if (hptr == NULL ) {
565- state.SkipWithError (" unsupported" );
566- }
567558 const cl_ulong pattern = 0 ;
568559 const size_t patternSize = state.range (0 );
569560 for (auto _ : state) {
570561 clEnqueueMemFillINTEL (
571562 queue (),
572- hptr,
573- &pattern,
574- patternSize,
575- sz,
576- 0 ,
577- NULL ,
578- NULL );
579- queue.finish ();
580- }
581- }
582- BENCHMARK_REGISTER_F (USMMemFill, clEnqueueMemFillINTEL_hptr)->Arg(1 )->Arg(4 )->Arg(8 );
583-
584- BENCHMARK_DEFINE_F (USMMemFill, clEnqueueMemFillINTEL_sptr)(benchmark::State& state)
585- {
586- if (sptr == NULL ) {
587- state.SkipWithError (" unsupported" );
588- }
589- const cl_ulong pattern = 0 ;
590- const size_t patternSize = state.range (0 );
591- for (auto _ : state) {
592- clEnqueueMemFillINTEL (
593- queue (),
594- sptr,
563+ dptr,
595564 &pattern,
596565 patternSize,
597566 sz,
@@ -601,7 +570,7 @@ BENCHMARK_DEFINE_F(USMMemFill, clEnqueueMemFillINTEL_sptr)(benchmark::State& sta
601570 queue.finish ();
602571 }
603572}
604- BENCHMARK_REGISTER_F (USMMemFill, clEnqueueMemFillINTEL_sptr )->Arg( 1 )->Arg( 4 )->Arg( 8 );
573+ BENCHMARK_REGISTER_F (USMMemFill, clEnqueueMemFillINTEL )->ArgsProduct({{ 1 , 4 , 8 , 16 }, { 0 , 1 , 2 }} );
605574
606575int main (int argc, char ** argv)
607576{
0 commit comments