From f762abfe186569b3f702e22d79e03513800d7287 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 10 Mar 2026 11:20:51 -0700 Subject: [PATCH 1/2] add a tester for profiling event profiling queries --- samples/99_profbench/CMakeLists.txt | 10 +++ samples/99_profbench/main.cpp | 133 ++++++++++++++++++++++++++++ samples/CMakeLists.txt | 2 + 3 files changed, 145 insertions(+) create mode 100644 samples/99_profbench/CMakeLists.txt create mode 100644 samples/99_profbench/main.cpp diff --git a/samples/99_profbench/CMakeLists.txt b/samples/99_profbench/CMakeLists.txt new file mode 100644 index 00000000..c8cb22d3 --- /dev/null +++ b/samples/99_profbench/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright (c) 2026 Ben Ashbaugh +# +# SPDX-License-Identifier: MIT + +add_opencl_sample( + TEST + NUMBER 99 + TARGET profbench + VERSION 120 + SOURCES main.cpp) diff --git a/samples/99_profbench/main.cpp b/samples/99_profbench/main.cpp new file mode 100644 index 00000000..aabdd639 --- /dev/null +++ b/samples/99_profbench/main.cpp @@ -0,0 +1,133 @@ +/* +// Copyright (c) 2026 Ben Ashbaugh +// +// SPDX-License-Identifier: MIT +*/ + +#include +#include +#include +#include "util.hpp" + +static const char kernelString[] = R"CLC( +kernel void inc_buffer(global int* dst) +{ + atomic_inc(dst); +} +)CLC"; + +int main( + int argc, + char** argv ) +{ + int platformIndex = 0; + int deviceIndex = 0; + + size_t numEvents = 1024 * 1024; + + { + popl::OptionParser op("Supported Options"); + op.add>("p", "platform", "Platform Index", platformIndex, &platformIndex); + op.add>("d", "device", "Device Index", deviceIndex, &deviceIndex); + op.add>("n", "numevents", "Number of Events", numEvents, &numEvents); + + bool printUsage = false; + try { + op.parse(argc, argv); + } catch (std::exception& e) { + fprintf(stderr, "Error: %s\n\n", e.what()); + printUsage = true; + } + + if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { + fprintf(stderr, + "Usage: profbench [options]\n" + "%s", op.help().c_str()); + return -1; + } + } + + std::vector platforms; + cl::Platform::get(&platforms); + + if (!checkPlatformIndex(platforms, platformIndex)) { + return -1; + } + + printf("Running on platform: %s\n", + platforms[platformIndex].getInfo().c_str() ); + + std::vector devices; + platforms[platformIndex].getDevices(CL_DEVICE_TYPE_ALL, &devices); + + printf("Running on device: %s\n", + devices[deviceIndex].getInfo().c_str() ); + + cl::Context context{devices[deviceIndex]}; + cl::CommandQueue commandQueue{context, devices[deviceIndex], CL_QUEUE_PROFILING_ENABLE}; + + cl::Program program{ context, kernelString }; + program.build(); + cl::Kernel kernel = cl::Kernel{ program, "inc_buffer" }; + + cl::Buffer buf = cl::Buffer{ + context, + CL_MEM_ALLOC_HOST_PTR, + sizeof(cl_int) }; + + kernel.setArg(0, buf); + + const cl_int zero = 0; + commandQueue.enqueueFillBuffer(buf, zero, 0, sizeof(zero)); + + std::vector events; + events.reserve(numEvents); + + printf("Enqueueing kernels to create %zu events...\n", numEvents); + for (int i = 0; i < numEvents; i++) { + cl::Event event; + commandQueue.enqueueNDRangeKernel( + kernel, + cl::NullRange, + cl::NDRange{1}, + cl::NullRange, + nullptr, + &event); + events.push_back(std::move(event)); + } + + printf("Waiting for %zu kernels to complete...\n", numEvents); + commandQueue.finish(); + + cl_ulong totalTimeNS = 0; + printf("Querying profiling data for %zu events...\n", numEvents); + + auto start = std::chrono::system_clock::now(); + + for (const auto& event : events) { + totalTimeNS = + event.getProfilingInfo() - + event.getProfilingInfo(); + } + + auto end = std::chrono::system_clock::now(); + std::chrono::duration queryTimeS = end - start; + printf("Querying profiling data took %f s (%f us per event)\n", + queryTimeS.count(), queryTimeS.count() * 1000000 / numEvents); + + int result = 0; + commandQueue.enqueueReadBuffer( + buf, + CL_TRUE, + 0, + sizeof(result), + &result); + + if (result == numEvents) { + printf("Success.\n"); + } else { + printf("Unexpected result: %d\n", result); + } + + return 0; +} diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index d0d8ac35..fa891dca 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -78,6 +78,8 @@ add_subdirectory( 06_ndrangekernelfromfile ) add_subdirectory( 10_queueexperiments ) add_subdirectory( 16_floatatomics ) +add_subdirectory( 99_profbench ) + set(BUILD_EXTENSION_SAMPLES TRUE) if(NOT TARGET OpenCLExt) message(STATUS "Skipping Extension Samples - OpenCL Extension Loader is not found.") From 00c0f64c1d6842ad51bbeb3cabd34706dea87ce0 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Tue, 10 Mar 2026 15:20:33 -0700 Subject: [PATCH 2/2] minor bugfix and cleanup --- samples/99_profbench/main.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/samples/99_profbench/main.cpp b/samples/99_profbench/main.cpp index aabdd639..4c8e3008 100644 --- a/samples/99_profbench/main.cpp +++ b/samples/99_profbench/main.cpp @@ -16,9 +16,7 @@ kernel void inc_buffer(global int* dst) } )CLC"; -int main( - int argc, - char** argv ) +int main(int argc, char** argv ) { int platformIndex = 0; int deviceIndex = 0; @@ -105,7 +103,7 @@ int main( auto start = std::chrono::system_clock::now(); for (const auto& event : events) { - totalTimeNS = + totalTimeNS += event.getProfilingInfo() - event.getProfilingInfo(); }