Skip to content

Commit e3b8048

Browse files
committed
add clCreateBuffer benchmarks
1 parent 04f1473 commit e3b8048

1 file changed

Lines changed: 95 additions & 9 deletions

File tree

  • samples/benchmarks/00_apibenchmark

samples/benchmarks/00_apibenchmark/main.cpp

Lines changed: 95 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ struct Platform : public benchmark::Fixture
9393

9494
BENCHMARK_DEFINE_F(Platform, clGetDeviceIDs)(benchmark::State& state)
9595
{
96-
while(state.KeepRunning()) {
96+
for(auto _ : state) {
9797
cl_uint numDevices = 0;
9898
clGetDeviceIDs(
9999
platform(),
@@ -119,7 +119,7 @@ struct Device : public benchmark::Fixture
119119

120120
BENCHMARK_DEFINE_F(Device, clGetDeviceInfo)(benchmark::State& state)
121121
{
122-
while(state.KeepRunning()) {
122+
for(auto _ : state) {
123123
cl_device_type type = 0;
124124
clGetDeviceInfo(
125125
device(),
@@ -131,6 +131,92 @@ BENCHMARK_DEFINE_F(Device, clGetDeviceInfo)(benchmark::State& state)
131131
}
132132
BENCHMARK_REGISTER_F(Device, clGetDeviceInfo);
133133

134+
struct Context : public benchmark::Fixture
135+
{
136+
cl::Context context;
137+
138+
virtual void SetUp(benchmark::State& state) override {
139+
context = env.context;
140+
}
141+
virtual void TearDown(benchmark::State& state) override {
142+
context = NULL;
143+
}
144+
};
145+
146+
BENCHMARK_DEFINE_F(Context, clCreateBuffer)(benchmark::State& state)
147+
{
148+
const size_t bufferSize = state.range(0);
149+
std::vector<cl_uchar> data(bufferSize, 0);
150+
151+
const size_t maxNumBuffers = 128;
152+
std::array<cl_mem, maxNumBuffers> buffers;
153+
154+
size_t count = 0;
155+
for(auto _ : state) {
156+
buffers[count++] = clCreateBuffer(
157+
context(),
158+
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
159+
data.size(),
160+
data.data(),
161+
nullptr);
162+
163+
if (count >= maxNumBuffers) {
164+
for(size_t i = 0; i < count; i++) {
165+
clReleaseMemObject(buffers[i]);
166+
}
167+
count = 0;
168+
}
169+
}
170+
171+
for(size_t i = 0; i < count; i++) {
172+
clReleaseMemObject(buffers[i]);
173+
}
174+
}
175+
BENCHMARK_REGISTER_F(Context, clCreateBuffer)->Arg(64);
176+
177+
BENCHMARK_DEFINE_F(Context, clCreateBuffer_ForceHostMem)(benchmark::State& state)
178+
{
179+
const size_t bufferSize = state.range(0);
180+
std::vector<cl_uchar> data(bufferSize, 0);
181+
182+
const size_t maxNumBuffers = 128;
183+
std::array<cl_mem, maxNumBuffers> buffers;
184+
185+
cl_mem test = clCreateBuffer(
186+
context(),
187+
CL_MEM_FORCE_HOST_MEMORY_INTEL,
188+
bufferSize,
189+
nullptr,
190+
nullptr);
191+
if (test) {
192+
clReleaseMemObject(test);
193+
} else {
194+
state.SkipWithError("Couldn't create buffer with CL_MEM_FORCE_HOST_MEMORY_INTEL");
195+
}
196+
197+
size_t count = 0;
198+
for(auto _ : state) {
199+
buffers[count++] = clCreateBuffer(
200+
context(),
201+
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
202+
data.size(),
203+
data.data(),
204+
nullptr);
205+
206+
if (count >= maxNumBuffers) {
207+
for(size_t i = 0; i < count; i++) {
208+
clReleaseMemObject(buffers[i]);
209+
}
210+
count = 0;
211+
}
212+
}
213+
214+
for(size_t i = 0; i < count; i++) {
215+
clReleaseMemObject(buffers[i]);
216+
}
217+
}
218+
BENCHMARK_REGISTER_F(Context, clCreateBuffer_ForceHostMem)->Arg(64);
219+
134220
struct Kernel : public benchmark::Fixture
135221
{
136222
cl::CommandQueue queue;
@@ -157,7 +243,7 @@ struct Kernel : public benchmark::Fixture
157243

158244
BENCHMARK_DEFINE_F(Kernel, clSetKernelArg)(benchmark::State& state)
159245
{
160-
while(state.KeepRunning()) {
246+
for(auto _ : state) {
161247
int x = 0;
162248
clSetKernelArg(
163249
kernel(),
@@ -174,7 +260,7 @@ BENCHMARK_DEFINE_F(Kernel, clEnqueueNDRangeKernel_NullQueueError)(benchmark::Sta
174260
const size_t global_work_size[work_dim] = { 1 };
175261
const size_t local_work_size[work_dim] = { 1 };
176262
const size_t global_work_offset[work_dim] = { 0 };
177-
while(state.KeepRunning()) {
263+
for(auto _ : state) {
178264
clEnqueueNDRangeKernel(
179265
NULL,
180266
kernel(),
@@ -195,7 +281,7 @@ BENCHMARK_DEFINE_F(Kernel, clEnqueueNDRangeKernel_NullKernelError)(benchmark::St
195281
const size_t global_work_size[work_dim] = { 1 };
196282
const size_t local_work_size[work_dim] = { 1 };
197283
const size_t global_work_offset[work_dim] = { 0 };
198-
while(state.KeepRunning()) {
284+
for(auto _ : state) {
199285
clEnqueueNDRangeKernel(
200286
queue(),
201287
NULL,
@@ -221,7 +307,7 @@ BENCHMARK_DEFINE_F(Kernel, clEnqueueNDRangeKernel_1x1_NoEvent)(benchmark::State&
221307

222308
size_t count = 0;
223309

224-
while(state.KeepRunning()) {
310+
for(auto _ : state) {
225311
clEnqueueNDRangeKernel(
226312
queue(),
227313
kernel(),
@@ -252,7 +338,7 @@ BENCHMARK_DEFINE_F(Kernel, clEnqueueNDRangeKernel_1x1_Event)(benchmark::State& s
252338

253339
size_t count = 0;
254340

255-
while(state.KeepRunning()) {
341+
for(auto _ : state) {
256342
cl_event event = NULL;
257343
clEnqueueNDRangeKernel(
258344
queue(),
@@ -307,7 +393,7 @@ BENCHMARK_DEFINE_F(SVMKernel, clSetKernelArgSVMPointer)(benchmark::State& state)
307393
{
308394
const int mask = (int)state.range(0) - 1;
309395
int i = 0;
310-
while(state.KeepRunning()) {
396+
for(auto _ : state) {
311397
clSetKernelArgSVMPointer(
312398
kernel(),
313399
0,
@@ -356,7 +442,7 @@ BENCHMARK_DEFINE_F(USMMemCpy, clEnqueueMemcpyINTEL_device_blocking)(benchmark::S
356442
if (dptrs[0] == NULL || dptrs[1] == NULL) {
357443
state.SkipWithError("unsupported");
358444
}
359-
while(state.KeepRunning()) {
445+
for(auto _ : state) {
360446
clEnqueueMemcpyINTEL(
361447
queue(),
362448
CL_TRUE,

0 commit comments

Comments
 (0)