Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 36bdf55

Browse filesBrowse files
committed
[AMDGPU][clang] provide device implementation for __builtin_logb and __builtin_scalbn
Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them. This patch generates a device implementations for __builtin_logb and __builtin_scalbn by emitting LLVM IRs. Only emit IRs when FP exceptions are disabled and math-errno is unset.
1 parent 0d19efa commit 36bdf55
Copy full SHA for 36bdf55

File tree

3 files changed

+1150
-3
lines changed
Filter options

3 files changed

+1150
-3
lines changed

‎clang/lib/CodeGen/CGBuiltin.cpp

Copy file name to clipboardExpand all lines: clang/lib/CodeGen/CGBuiltin.cpp
+32-3Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,33 @@ using namespace clang;
4343
using namespace CodeGen;
4444
using namespace llvm;
4545

46+
/// Some builtins do not have library implementation on some targets and
47+
/// are instead emitted as LLVM IRs by some target builtin emitters.
48+
/// FIXME: Remove this when library support is added
49+
static bool shouldEmitBuiltinAsIR(unsigned BuiltinID,
50+
const Builtin::Context &BI,
51+
const CodeGenFunction &CGF) {
52+
if (!CGF.CGM.getLangOpts().MathErrno &&
53+
CGF.CurFPFeatures.getExceptionMode() ==
54+
LangOptions::FPExceptionModeKind::FPE_Ignore &&
55+
CGF.getTarget().getTriple().isAMDGCN()) {
56+
switch (BuiltinID) {
57+
default:
58+
return false;
59+
case Builtin::BIlogbf:
60+
case Builtin::BI__builtin_logbf:
61+
case Builtin::BIlogb:
62+
case Builtin::BI__builtin_logb:
63+
case Builtin::BIscalbnf:
64+
case Builtin::BI__builtin_scalbnf:
65+
case Builtin::BIscalbn:
66+
case Builtin::BI__builtin_scalbn:
67+
return true;
68+
}
69+
}
70+
return false;
71+
}
72+
4673
static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
4774
unsigned BuiltinID, const CallExpr *E,
4875
ReturnValueSlot ReturnValue,
@@ -2414,7 +2441,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
24142441
// disabled.
24152442
// Math intrinsics are generated only when math-errno is disabled. Any pragmas
24162443
// or attributes that affect math-errno should prevent or allow math
2417-
// intrincs to be generated. Intrinsics are generated:
2444+
// intrinsics to be generated. Intrinsics are generated:
24182445
// 1- In fast math mode, unless math-errno is overriden
24192446
// via '#pragma float_control(precise, on)', or via an
24202447
// 'attribute__((optnone))'.
@@ -5999,13 +6026,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
59996026
// If this is an alias for a lib function (e.g. __builtin_sin), emit
60006027
// the call using the normal call path, but using the unmangled
60016028
// version of the function name.
6002-
if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6029+
const auto &BI = getContext().BuiltinInfo;
6030+
if (!shouldEmitBuiltinAsIR(BuiltinID, BI, *this) &&
6031+
BI.isLibFunction(BuiltinID))
60036032
return emitLibraryCall(*this, FD, E,
60046033
CGM.getBuiltinLibFunction(FD, BuiltinID));
60056034

60066035
// If this is a predefined lib function (e.g. malloc), emit the call
60076036
// using exactly the normal call path.
6008-
if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6037+
if (BI.isPredefinedLibFunction(BuiltinID))
60096038
return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
60106039

60116040
// Check that a call to a target specific builtin has the correct target

‎clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Copy file name to clipboardExpand all lines: clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+73Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,27 @@ using namespace CodeGen;
2323
using namespace llvm;
2424

2525
namespace {
26+
27+
// Has second type mangled argument.
28+
static Value *
29+
emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E,
30+
Intrinsic::ID IntrinsicID,
31+
Intrinsic::ID ConstrainedIntrinsicID) {
32+
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
33+
llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
34+
35+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
36+
if (CGF.Builder.getIsFPConstrained()) {
37+
Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
38+
{Src0->getType(), Src1->getType()});
39+
return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
40+
}
41+
42+
Function *F =
43+
CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
44+
return CGF.Builder.CreateCall(F, {Src0, Src1});
45+
}
46+
2647
// If \p E is not null pointer, insert address space cast to match return
2748
// type of \p E if necessary.
2849
Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
@@ -1142,6 +1163,58 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
11421163
case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
11431164
return emitBuiltinWithOneOverloadedType<2>(
11441165
*this, E, Intrinsic::amdgcn_s_prefetch_data);
1166+
case Builtin::BIlogbf:
1167+
case Builtin::BI__builtin_logbf: {
1168+
Value *Src0 = EmitScalarExpr(E->getArg(0));
1169+
Function *FrExpFunc = CGM.getIntrinsic(
1170+
Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
1171+
CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1172+
Value *Exp = Builder.CreateExtractValue(FrExp, 1);
1173+
Value *Add = Builder.CreateAdd(
1174+
Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1175+
Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getFloatTy());
1176+
Value *Fabs =
1177+
emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
1178+
Value *FCmpONE = Builder.CreateFCmpONE(
1179+
Fabs, ConstantFP::getInfinity(Builder.getFloatTy()));
1180+
Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1181+
Value *FCmpOEQ =
1182+
Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getFloatTy()));
1183+
Value *Sel2 = Builder.CreateSelect(
1184+
FCmpOEQ,
1185+
ConstantFP::getInfinity(Builder.getFloatTy(), /*Negative=*/true),
1186+
Sel1);
1187+
return Sel2;
1188+
}
1189+
case Builtin::BIlogb:
1190+
case Builtin::BI__builtin_logb: {
1191+
Value *Src0 = EmitScalarExpr(E->getArg(0));
1192+
Function *FrExpFunc = CGM.getIntrinsic(
1193+
Intrinsic::frexp, {Src0->getType(), Builder.getInt32Ty()});
1194+
CallInst *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1195+
Value *Exp = Builder.CreateExtractValue(FrExp, 1);
1196+
Value *Add = Builder.CreateAdd(
1197+
Exp, ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1198+
Value *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
1199+
Value *Fabs =
1200+
emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
1201+
Value *FCmpONE = Builder.CreateFCmpONE(
1202+
Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
1203+
Value *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1204+
Value *FCmpOEQ =
1205+
Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
1206+
Value *Sel2 = Builder.CreateSelect(
1207+
FCmpOEQ,
1208+
ConstantFP::getInfinity(Builder.getDoubleTy(), /*Negative=*/true),
1209+
Sel1);
1210+
return Sel2;
1211+
}
1212+
case Builtin::BIscalbnf:
1213+
case Builtin::BI__builtin_scalbnf:
1214+
case Builtin::BIscalbn:
1215+
case Builtin::BI__builtin_scalbn:
1216+
return emitBinaryExpMaybeConstrainedFPBuiltin(
1217+
*this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
11451218
default:
11461219
return nullptr;
11471220
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.