Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 62e5168

Browse filesBrowse files
committed
[AMDGPU] Update code object metadata for kernarg preload
Tracks the registers that explicit and hidden arguments are preloaded to with new code object metadata. IR arguments may be split across multiple parts by isel, and SGPR tuple alignment means that an argument may be spread across multiple registers. To support this, some of the utilities for hidden kernel arguments are moved to `AMDGPUArgumentUsageInfo.h`. Additional bookkeeping is also needed for tracking purposes.
1 parent 8f65519 commit 62e5168
Copy full SHA for 62e5168
Expand file treeCollapse file tree

21 files changed

+951
-176
lines changed

‎llvm/include/llvm/Support/AMDGPUMetadata.h

Copy file name to clipboardExpand all lines: llvm/include/llvm/Support/AMDGPUMetadata.h
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ constexpr uint32_t VersionMinorV5 = 2;
4747
/// HSA metadata major version for code object V6.
4848
constexpr uint32_t VersionMajorV6 = 1;
4949
/// HSA metadata minor version for code object V6.
50-
constexpr uint32_t VersionMinorV6 = 2;
50+
constexpr uint32_t VersionMinorV6 = 3;
5151

5252
/// Old HSA metadata beginning assembler directive for V2. This is only used for
5353
/// diagnostics now.

‎llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+34Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,15 @@
1616
#include "llvm/Support/raw_ostream.h"
1717

1818
using namespace llvm;
19+
using namespace llvm::KernArgPreload;
1920

2021
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
2122

2223
INITIALIZE_PASS(AMDGPUArgumentUsageInfo, DEBUG_TYPE,
2324
"Argument Register Usage Information Storage", false, true)
2425

26+
constexpr HiddenArgInfo HiddenArgUtils::HiddenArgs[END_HIDDEN_ARGS];
27+
2528
void ArgDescriptor::print(raw_ostream &OS,
2629
const TargetRegisterInfo *TRI) const {
2730
if (!isSet()) {
@@ -176,6 +179,37 @@ AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
176179
return AI;
177180
}
178181

182+
SmallVector<const KernArgPreloadDescriptor *, 4>
183+
AMDGPUFunctionArgInfo::getPreloadDescriptorsForArgIdx(unsigned ArgIdx) const {
184+
SmallVector<const KernArgPreloadDescriptor *, 4> Results;
185+
for (const auto &KV : PreloadKernArgs) {
186+
if (KV.second.OrigArgIdx == ArgIdx)
187+
Results.push_back(&KV.second);
188+
}
189+
190+
llvm::stable_sort(Results, [](const KernArgPreloadDescriptor *A,
191+
const KernArgPreloadDescriptor *B) {
192+
return A->PartIdx < B->PartIdx;
193+
});
194+
195+
return Results;
196+
}
197+
198+
std::optional<const KernArgPreloadDescriptor *>
199+
AMDGPUFunctionArgInfo::getHiddenArgPreloadDescriptor(HiddenArg HA) const {
200+
assert(HA < END_HIDDEN_ARGS);
201+
202+
auto HiddenArgIt = PreloadHiddenArgsIndexMap.find(HA);
203+
if (HiddenArgIt == PreloadHiddenArgsIndexMap.end())
204+
return std::nullopt;
205+
206+
auto KernArgIt = PreloadKernArgs.find(HiddenArgIt->second);
207+
if (KernArgIt == PreloadKernArgs.end())
208+
return std::nullopt;
209+
210+
return &KernArgIt->second;
211+
}
212+
179213
const AMDGPUFunctionArgInfo &
180214
AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
181215
auto I = ArgInfoMap.find(&F);

‎llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h

Copy file name to clipboardExpand all lines: llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+87-4Lines changed: 87 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111

1212
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1313
#include "llvm/ADT/DenseMap.h"
14+
#include "llvm/Analysis/ValueTracking.h"
1415
#include "llvm/CodeGen/Register.h"
16+
#include "llvm/IR/LLVMContext.h"
17+
#include "llvm/IR/Type.h"
1518
#include "llvm/Pass.h"
1619

1720
namespace llvm {
@@ -95,11 +98,78 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {
9598
return OS;
9699
}
97100

98-
struct KernArgPreloadDescriptor : public ArgDescriptor {
99-
KernArgPreloadDescriptor() {}
100-
SmallVector<MCRegister> Regs;
101+
namespace KernArgPreload {
102+
103+
enum HiddenArg {
104+
HIDDEN_BLOCK_COUNT_X,
105+
HIDDEN_BLOCK_COUNT_Y,
106+
HIDDEN_BLOCK_COUNT_Z,
107+
HIDDEN_GROUP_SIZE_X,
108+
HIDDEN_GROUP_SIZE_Y,
109+
HIDDEN_GROUP_SIZE_Z,
110+
HIDDEN_REMAINDER_X,
111+
HIDDEN_REMAINDER_Y,
112+
HIDDEN_REMAINDER_Z,
113+
END_HIDDEN_ARGS
101114
};
102115

116+
// Stores information about a specific hidden argument.
117+
struct HiddenArgInfo {
118+
// Offset in bytes from the location in the kernearg segment pointed to by
119+
// the implicitarg pointer.
120+
uint8_t Offset;
121+
// The size of the hidden argument in bytes.
122+
uint8_t Size;
123+
// The name of the hidden argument in the kernel signature.
124+
const char *Name;
125+
};
126+
127+
struct HiddenArgUtils {
128+
static constexpr HiddenArgInfo HiddenArgs[END_HIDDEN_ARGS] = {
129+
{0, 4, "_hidden_block_count_x"}, {4, 4, "_hidden_block_count_y"},
130+
{8, 4, "_hidden_block_count_z"}, {12, 2, "_hidden_group_size_x"},
131+
{14, 2, "_hidden_group_size_y"}, {16, 2, "_hidden_group_size_z"},
132+
{18, 2, "_hidden_remainder_x"}, {20, 2, "_hidden_remainder_y"},
133+
{22, 2, "_hidden_remainder_z"}};
134+
135+
static HiddenArg getHiddenArgFromOffset(unsigned Offset) {
136+
for (unsigned I = 0; I < END_HIDDEN_ARGS; ++I)
137+
if (HiddenArgs[I].Offset == Offset)
138+
return static_cast<HiddenArg>(I);
139+
140+
return END_HIDDEN_ARGS;
141+
}
142+
143+
static Type *getHiddenArgType(LLVMContext &Ctx, HiddenArg HA) {
144+
if (HA < END_HIDDEN_ARGS)
145+
return static_cast<Type *>(Type::getIntNTy(Ctx, HiddenArgs[HA].Size * 8));
146+
147+
llvm_unreachable("Unexpected hidden argument.");
148+
}
149+
150+
static const char *getHiddenArgName(HiddenArg HA) {
151+
if (HA < END_HIDDEN_ARGS) {
152+
return HiddenArgs[HA].Name;
153+
}
154+
llvm_unreachable("Unexpected hidden argument.");
155+
}
156+
};
157+
158+
struct KernArgPreloadDescriptor {
159+
// Id of the original argument in the IR kernel function argument list.
160+
unsigned OrigArgIdx = 0;
161+
162+
// If this IR argument was split into multiple parts, this is the index of the
163+
// part in the original argument.
164+
unsigned PartIdx = 0;
165+
166+
// The registers that the argument is preloaded into. The argument may be
167+
// split accross multilpe registers.
168+
SmallVector<MCRegister, 2> Regs;
169+
};
170+
171+
} // namespace KernArgPreload
172+
103173
struct AMDGPUFunctionArgInfo {
104174
// clang-format off
105175
enum PreloadedValue {
@@ -161,14 +231,27 @@ struct AMDGPUFunctionArgInfo {
161231
ArgDescriptor WorkItemIDZ;
162232

163233
// Map the index of preloaded kernel arguments to its descriptor.
164-
SmallDenseMap<int, KernArgPreloadDescriptor> PreloadKernArgs{};
234+
SmallDenseMap<int, KernArgPreload::KernArgPreloadDescriptor>
235+
PreloadKernArgs{};
236+
// Map hidden argument to the index of it's descriptor.
237+
SmallDenseMap<KernArgPreload::HiddenArg, int> PreloadHiddenArgsIndexMap{};
165238
// The first user SGPR allocated for kernarg preloading.
166239
Register FirstKernArgPreloadReg;
167240

168241
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
169242
getPreloadedValue(PreloadedValue Value) const;
170243

171244
static AMDGPUFunctionArgInfo fixedABILayout();
245+
246+
// Returns preload argument descriptors for an IR argument index. Isel may
247+
// split IR arguments into multiple parts, the return vector holds all parts
248+
// associated with an IR argument in the kernel signature.
249+
SmallVector<const KernArgPreload::KernArgPreloadDescriptor *, 4>
250+
getPreloadDescriptorsForArgIdx(unsigned ArgIdx) const;
251+
252+
// Returns the hidden arguments `KernArgPreloadDescriptor` if it is preloaded.
253+
std::optional<const KernArgPreload::KernArgPreloadDescriptor *>
254+
getHiddenArgPreloadDescriptor(KernArgPreload::HiddenArg HA) const;
172255
};
173256

174257
class AMDGPUArgumentUsageInfo : public ImmutablePass {

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.