Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 397459e

Browse filesBrowse files
committed
Allow OpenCL C device version checks on older platforms
1 parent 9b0f362 commit 397459e
Copy full SHA for 397459e

File tree

Expand file treeCollapse file tree

10 files changed

+84
-32
lines changed
Filter options
Expand file treeCollapse file tree

10 files changed

+84
-32
lines changed

‎CMakeLists.txt

Copy file name to clipboardExpand all lines: CMakeLists.txt
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ set(MKL_THREAD_LAYER "Intel OpenMP" CACHE STRING "The thread layer to choose for
5050

5151
find_package(CUDA 9.0)
5252
find_package(cuDNN 4.0)
53-
find_package(OpenCL 3.0)
53+
find_package(OpenCL 1.2)
5454
find_package(OpenGL)
5555
find_package(glad CONFIG QUIET)
5656
find_package(FreeImage)

‎src/backend/opencl/Array.cpp

Copy file name to clipboardExpand all lines: src/backend/opencl/Array.cpp
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ kJITHeuristics passesJitHeuristics(span<Node *> root_nodes) {
307307
}
308308

309309
bool isBufferLimit = getMemoryPressure() >= getMemoryPressureThreshold();
310-
auto platform = getActivePlatform();
310+
auto platform = getActivePlatformVendor();
311311

312312
// The Apple platform can have the nvidia card or the AMD card
313313
bool isIntel = platform == AFCL_PLATFORM_INTEL;

‎src/backend/opencl/CMakeLists.txt

Copy file name to clipboardExpand all lines: src/backend/opencl/CMakeLists.txt
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ file_to_string(
139139
set(opencl_compile_definitions
140140
CL_TARGET_OPENCL_VERSION=300
141141
CL_HPP_TARGET_OPENCL_VERSION=300
142-
CL_HPP_MINIMUM_OPENCL_VERSION=300
142+
CL_HPP_MINIMUM_OPENCL_VERSION=110
143143
CL_HPP_ENABLE_EXCEPTIONS)
144144

145145
include(kernel/scan_by_key/CMakeLists.txt)

‎src/backend/opencl/device_manager.cpp

Copy file name to clipboardExpand all lines: src/backend/opencl/device_manager.cpp
+24-12Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -255,21 +255,33 @@ DeviceManager::DeviceManager()
255255
*mContexts.back(), *devices[i], cl::QueueProperties::None));
256256
mIsGLSharingOn.push_back(false);
257257
mDeviceTypes.push_back(getDeviceTypeEnum(*devices[i]));
258-
mPlatforms.push_back(getPlatformEnum(*devices[i]));
258+
mPlatforms.push_back(
259+
std::make_pair<std::unique_ptr<cl::Platform>, afcl_platform>(
260+
make_unique<cl::Platform>(device_platform, true),
261+
getPlatformEnum(*devices[i])));
259262
mDevices.emplace_back(std::move(devices[i]));
260263

261-
auto device_versions =
262-
mDevices.back()->getInfo<CL_DEVICE_OPENCL_C_ALL_VERSIONS>();
263-
sort(begin(device_versions), end(device_versions),
264-
[](const auto& lhs, const auto& rhs) {
265-
return lhs.version < rhs.version;
266-
});
267-
cl_name_version max_version = device_versions.back();
264+
auto platform_version =
265+
mPlatforms.back().first->getInfo<CL_PLATFORM_VERSION>();
268266
ostringstream options;
269-
options << fmt::format(" -cl-std=CL{}.{}",
270-
CL_VERSION_MAJOR(max_version.version),
271-
CL_VERSION_MINOR(max_version.version))
272-
<< fmt::format(" -D dim_t={}",
267+
if (platform_version.substr(7).c_str()[0] >= '3') {
268+
auto device_versions =
269+
mDevices.back()->getInfo<CL_DEVICE_OPENCL_C_ALL_VERSIONS>();
270+
sort(begin(device_versions), end(device_versions),
271+
[](const auto& lhs, const auto& rhs) {
272+
return lhs.version < rhs.version;
273+
});
274+
cl_name_version max_version = device_versions.back();
275+
options << fmt::format(" -cl-std=CL{}.{}",
276+
CL_VERSION_MAJOR(max_version.version),
277+
CL_VERSION_MINOR(max_version.version));
278+
} else {
279+
auto device_version =
280+
mDevices.back()->getInfo<CL_DEVICE_OPENCL_C_VERSION>();
281+
options << fmt::format(" -cl-std=CL{}",
282+
device_version.substr(9, 3));
283+
}
284+
options << fmt::format(" -D dim_t={}",
273285
dtype_traits<dim_t>::getName());
274286
#ifdef AF_WITH_FAST_MATH
275287
options << " -cl-fast-relaxed-math";

‎src/backend/opencl/device_manager.hpp

Copy file name to clipboardExpand all lines: src/backend/opencl/device_manager.hpp
+7-2Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
#pragma once
1111

12+
#include <af/opencl.h>
13+
1214
#include <memory>
1315
#include <mutex>
1416
#include <string>
@@ -129,7 +131,9 @@ class DeviceManager {
129131

130132
friend int getActiveDeviceType();
131133

132-
friend int getActivePlatform();
134+
friend cl::Platform& getActivePlatform();
135+
136+
friend afcl::platform getActivePlatformVendor();
133137

134138
public:
135139
static const int MAX_DEVICES = 32;
@@ -163,7 +167,8 @@ class DeviceManager {
163167
std::vector<bool> mIsGLSharingOn;
164168
std::vector<std::string> mBaseBuildFlags;
165169
std::vector<int> mDeviceTypes;
166-
std::vector<int> mPlatforms;
170+
std::vector<std::pair<std::unique_ptr<cl::Platform>, afcl::platform>>
171+
mPlatforms;
167172
unsigned mUserDeviceOffset;
168173

169174
std::unique_ptr<graphics::ForgeManager> fgMngr;

‎src/backend/opencl/kernel/flood_fill.hpp

Copy file name to clipboardExpand all lines: src/backend/opencl/kernel/flood_fill.hpp
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ void floodFill(Param out, const Param image, const Param seedsx,
8383
DefineKeyValue(LMEM_WIDTH, (THREADS_X + 2 * RADIUS)),
8484
DefineKeyValue(LMEM_HEIGHT, (THREADS_Y + 2 * RADIUS)),
8585
DefineKeyValue(GROUP_SIZE, (THREADS_Y * THREADS_X)),
86-
DefineKeyValue(AF_IS_PLATFORM_NVIDIA,
87-
(int)(AFCL_PLATFORM_NVIDIA == getActivePlatform())),
86+
DefineKeyValue(AF_IS_PLATFORM_NVIDIA, (int)(AFCL_PLATFORM_NVIDIA ==
87+
getActivePlatformVendor())),
8888
getTypeBuildDefinition<T>()};
8989

9090
auto floodStep =

‎src/backend/opencl/magma/getrs.cpp

Copy file name to clipboardExpand all lines: src/backend/opencl/magma/getrs.cpp
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ magma_int_t magma_getrs_gpu(magma_trans_t trans, magma_int_t n,
165165
: (trans == MagmaTrans ? OPENCL_BLAS_TRANS
166166
: OPENCL_BLAS_CONJ_TRANS);
167167

168-
bool cond = opencl::getActivePlatform() == AFCL_PLATFORM_NVIDIA;
168+
bool cond = opencl::getActivePlatformVendor() == AFCL_PLATFORM_NVIDIA;
169169
cl_mem dAT = 0;
170170
if (nrhs > 1 && cond) {
171171
magma_malloc<Ty>(&dAT, n * n);

‎src/backend/opencl/platform.cpp

Copy file name to clipboardExpand all lines: src/backend/opencl/platform.cpp
+42-9Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -254,15 +254,26 @@ int getActiveDeviceType() {
254254
return devMngr.mDeviceTypes[get<1>(devId)];
255255
}
256256

257-
int getActivePlatform() {
257+
cl::Platform& getActivePlatform() {
258258
device_id_t& devId = tlocalActiveDeviceId();
259259

260260
DeviceManager& devMngr = DeviceManager::getInstance();
261261

262262
common::lock_guard_t lock(devMngr.deviceMutex);
263263

264-
return devMngr.mPlatforms[get<1>(devId)];
264+
return *devMngr.mPlatforms[get<1>(devId)].first;
265265
}
266+
267+
afcl::platform getActivePlatformVendor() {
268+
device_id_t& devId = tlocalActiveDeviceId();
269+
270+
DeviceManager& devMngr = DeviceManager::getInstance();
271+
272+
common::lock_guard_t lock(devMngr.deviceMutex);
273+
274+
return devMngr.mPlatforms[get<1>(devId)].second;
275+
}
276+
266277
const Context& getContext() {
267278
device_id_t& devId = tlocalActiveDeviceId();
268279

@@ -470,12 +481,17 @@ void addDeviceContext(cl_device_id dev, cl_context ctx, cl_command_queue que) {
470481
auto tQueue =
471482
(que == NULL ? make_unique<cl::CommandQueue>(*tContext, *tDevice)
472483
: make_unique<cl::CommandQueue>(que, true));
473-
devMngr.mPlatforms.push_back(getPlatformEnum(*tDevice));
474484
// FIXME: add OpenGL Interop for user provided contexts later
475485
devMngr.mIsGLSharingOn.push_back(false);
476486
devMngr.mDeviceTypes.push_back(
477487
static_cast<int>(tDevice->getInfo<CL_DEVICE_TYPE>()));
478488

489+
auto device_platform = tDevice->getInfo<CL_DEVICE_PLATFORM>();
490+
devMngr.mPlatforms.push_back(
491+
std::make_pair<std::unique_ptr<cl::Platform>, afcl_platform>(
492+
make_unique<cl::Platform>(device_platform, true),
493+
getPlatformEnum(*tDevice)));
494+
479495
devMngr.mDevices.push_back(move(tDevice));
480496
devMngr.mContexts.push_back(move(tContext));
481497
devMngr.mQueues.push_back(move(tQueue));
@@ -487,12 +503,29 @@ void addDeviceContext(cl_device_id dev, cl_context ctx, cl_command_queue que) {
487503
[](const auto& lhs, const auto& rhs) {
488504
return lhs.version < rhs.version;
489505
});
490-
cl_name_version max_version = device_versions.back();
506+
507+
auto platform_version =
508+
devMngr.mPlatforms.back().first->getInfo<CL_PLATFORM_VERSION>();
491509
ostringstream options;
492-
options << fmt::format(" -cl-std=CL{}.{}",
493-
CL_VERSION_MAJOR(max_version.version),
494-
CL_VERSION_MINOR(max_version.version))
495-
<< fmt::format(" -D dim_t={}", dtype_traits<dim_t>::getName());
510+
if (platform_version.substr(7).c_str()[0] >= '3') {
511+
auto device_versions =
512+
devMngr.mDevices.back()
513+
->getInfo<CL_DEVICE_OPENCL_C_ALL_VERSIONS>();
514+
sort(begin(device_versions), end(device_versions),
515+
[](const auto& lhs, const auto& rhs) {
516+
return lhs.version < rhs.version;
517+
});
518+
cl_name_version max_version = device_versions.back();
519+
options << fmt::format(" -cl-std=CL{}.{}",
520+
CL_VERSION_MAJOR(max_version.version),
521+
CL_VERSION_MINOR(max_version.version));
522+
} else {
523+
auto device_version =
524+
devMngr.mDevices.back()->getInfo<CL_DEVICE_OPENCL_C_VERSION>();
525+
options << fmt::format(" -cl-std=CL{}",
526+
device_version.substr(9, 3));
527+
}
528+
options << fmt::format(" -D dim_t={}", dtype_traits<dim_t>::getName());
496529
#ifdef AF_WITH_FAST_MATH
497530
options << " -cl-fast-relaxed-math";
498531
#endif
@@ -707,7 +740,7 @@ af_err afcl_get_device_type(afcl_device_type* res) {
707740

708741
af_err afcl_get_platform(afcl_platform* res) {
709742
try {
710-
*res = static_cast<afcl_platform>(getActivePlatform());
743+
*res = static_cast<afcl_platform>(getActivePlatformVendor());
711744
}
712745
CATCHALL;
713746
return AF_SUCCESS;

‎src/backend/opencl/platform.hpp

Copy file name to clipboardExpand all lines: src/backend/opencl/platform.hpp
+3-1Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,9 @@ bool synchronize_calls();
147147

148148
int getActiveDeviceType();
149149

150-
int getActivePlatform();
150+
cl::Platform& getActivePlatform();
151+
152+
afcl::platform getActivePlatformVendor();
151153

152154
bool& evalFlag();
153155

‎src/backend/opencl/solve.cpp

Copy file name to clipboardExpand all lines: src/backend/opencl/solve.cpp
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ Array<T> leastSquares(const Array<T> &a, const Array<T> &b) {
229229
A.strides()[1], 1, (*dT)(),
230230
tmp.getOffset() + NB * MN, NB, 0, queue);
231231

232-
if (getActivePlatform() == AFCL_PLATFORM_NVIDIA) {
232+
if (getActivePlatformVendor() == AFCL_PLATFORM_NVIDIA) {
233233
Array<T> AT = transpose<T>(A, true);
234234
Buffer *AT_buf = AT.get();
235235
OPENCL_BLAS_CHECK(gpu_blas_trsm(
@@ -268,7 +268,7 @@ Array<T> triangleSolve(const Array<T> &A, const Array<T> &b,
268268
cl_event event = 0;
269269
cl_command_queue queue = getQueue()();
270270

271-
if (getActivePlatform() == AFCL_PLATFORM_NVIDIA &&
271+
if (getActivePlatformVendor() == AFCL_PLATFORM_NVIDIA &&
272272
(options & AF_MAT_UPPER)) {
273273
Array<T> AT = transpose<T>(A, true);
274274

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.