Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Latest commit

 

History

History
History
81 lines (67 loc) · 2.22 KB

File metadata and controls

81 lines (67 loc) · 2.22 KB
Copy raw file
Download raw file
Open symbols panel
Edit and raw actions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Data Parallel Control (dpctl)
#
# Copyright 2020-2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import dpctl
import dpctl.tensor as dpt
from dpctl import SyclTimer
def matmul(m1, m2):
"""Naive matrix multiplication implementation"""
assert m1.ndim == 2
assert m2.ndim == 2
assert m1.shape[1] == m2.shape[0]
m1 = m1[:, dpt.newaxis, :]
m2 = dpt.permute_dims(m2, (1, 0))[dpt.newaxis, :, :]
# form m_prod[i, j, k] = m1[i,k] * m2[k, j]
m_prods = m1 * m2
# sum over k
return dpt.sum(m_prods, axis=-1)
n = 500
try:
q = dpctl.SyclQueue(property="enable_profiling")
except dpctl.SyclQueueCreationError:
print(
"Skipping the example, as dpctl.SyclQueue targeting "
"default device could not be created"
)
exit(0)
a_flat = dpt.arange(n * n, dtype=dpt.float32, sycl_queue=q)
a = dpt.reshape(a_flat, (n, n))
b_rand = np.random.random(n * n).astype(np.float32)
b_flat = dpt.asarray(b_rand, dtype=dpt.float32, sycl_queue=q)
b = dpt.reshape(b_flat, (n, n))
wall_times = []
device_times = []
print(
f"Computing naive matrix multiplication of two {n} by {n} matrices "
f"on {q.sycl_device.name}, repeating 5 times."
)
print()
for _ in range(5):
timer = SyclTimer(time_scale=1)
with timer(q):
a_matmul_b = matmul(a, b)
host_time, device_time = timer.dt
wall_times.append(host_time)
device_times.append(device_time)
c = dpt.asnumpy(a_matmul_b)
cc = np.dot(dpt.asnumpy(a), dpt.asnumpy(b))
print("Wall time: ", wall_times, "\nDevice time: ", device_times)
print()
print(
"Accuracy test: passed."
if np.allclose(c, cc)
else (f"Accuracy test: FAILED. \n Discrepancy = {np.max(np.abs(c-cc))}")
)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.