Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a7f2b79

Browse filesBrowse files
committed
[AMDGPU][clang] provide device implementation for __builtin_logb and __builtin_scalbn
Clang generates library calls for __builtin_* functions which can be a problem for GPUs that cannot handle them. This patch generates a device implementations for __builtin_logb and __builtin_scalbn by emitting LLVM IRs.
1 parent 0d19efa commit a7f2b79
Copy full SHA for a7f2b79

File tree

Expand file treeCollapse file tree

3 files changed

+308
-2
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+308
-2
lines changed

‎clang/lib/CodeGen/CGBuiltin.cpp

Copy file name to clipboardExpand all lines: clang/lib/CodeGen/CGBuiltin.cpp
+25-2Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,27 @@ using namespace clang;
4343
using namespace CodeGen;
4444
using namespace llvm;
4545

46+
/// Some builtins do not have library implementation on some targets and
47+
/// are instead emitted as LLVM IRs by some target builtin emitters.
48+
/// FIXME: Remove this when library support is added
49+
static bool shouldEmitBuiltinAsIR(unsigned BuiltinID,
50+
const Builtin::Context &BI,
51+
const TargetInfo &TI) {
52+
if (BI.isConstWithoutErrnoAndExceptions(BuiltinID) &&
53+
TI.getTriple().isAMDGCN()) {
54+
switch (BuiltinID) {
55+
default:
56+
return false;
57+
case Builtin::BIlogb:
58+
case Builtin::BI__builtin_logb:
59+
case Builtin::BIscalbn:
60+
case Builtin::BI__builtin_scalbn:
61+
return true;
62+
}
63+
}
64+
return false;
65+
}
66+
4667
static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
4768
unsigned BuiltinID, const CallExpr *E,
4869
ReturnValueSlot ReturnValue,
@@ -5999,13 +6020,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
59996020
// If this is an alias for a lib function (e.g. __builtin_sin), emit
60006021
// the call using the normal call path, but using the unmangled
60016022
// version of the function name.
6002-
if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6023+
const auto &BI = getContext().BuiltinInfo;
6024+
if (!shouldEmitBuiltinAsIR(BuiltinID, BI, getTarget()) &&
6025+
BI.isLibFunction(BuiltinID))
60036026
return emitLibraryCall(*this, FD, E,
60046027
CGM.getBuiltinLibFunction(FD, BuiltinID));
60056028

60066029
// If this is a predefined lib function (e.g. malloc), emit the call
60076030
// using exactly the normal call path.
6008-
if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6031+
if (BI.isPredefinedLibFunction(BuiltinID))
60096032
return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
60106033

60116034
// Check that a call to a target specific builtin has the correct target

‎clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp

Copy file name to clipboardExpand all lines: clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+46Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,27 @@ using namespace CodeGen;
2323
using namespace llvm;
2424

2525
namespace {
26+
27+
// Has second type mangled argument.
28+
static Value *
29+
emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E,
30+
Intrinsic::ID IntrinsicID,
31+
Intrinsic::ID ConstrainedIntrinsicID) {
32+
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
33+
llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
34+
35+
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
36+
if (CGF.Builder.getIsFPConstrained()) {
37+
Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
38+
{Src0->getType(), Src1->getType()});
39+
return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
40+
}
41+
42+
Function *F =
43+
CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
44+
return CGF.Builder.CreateCall(F, {Src0, Src1});
45+
}
46+
2647
// If \p E is not null pointer, insert address space cast to match return
2748
// type of \p E if necessary.
2849
Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
@@ -1142,6 +1163,31 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
11421163
case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
11431164
return emitBuiltinWithOneOverloadedType<2>(
11441165
*this, E, Intrinsic::amdgcn_s_prefetch_data);
1166+
case Builtin::BIlogb:
1167+
case Builtin::BI__builtin_logb: {
1168+
auto *Src0 = EmitScalarExpr(E->getArg(0));
1169+
auto *FrExpFunc = CGM.getIntrinsic(Intrinsic::frexp,
1170+
{Src0->getType(), Builder.getInt32Ty()});
1171+
auto *FrExp = Builder.CreateCall(FrExpFunc, Src0);
1172+
auto *Exp = Builder.CreateExtractValue(FrExp, 1);
1173+
auto *Add = Builder.CreateAdd(Exp,
1174+
ConstantInt::getSigned(Exp->getType(), -1), "", false, true);
1175+
auto *SIToFP = Builder.CreateSIToFP(Add, Builder.getDoubleTy());
1176+
auto *Fabs = emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs);
1177+
auto *FCmpONE = Builder.CreateFCmpONE(
1178+
Fabs, ConstantFP::getInfinity(Builder.getDoubleTy()));
1179+
auto *Sel1 = Builder.CreateSelect(FCmpONE, SIToFP, Fabs);
1180+
auto *FCmpOEQ =
1181+
Builder.CreateFCmpOEQ(Src0, ConstantFP::getZero(Builder.getDoubleTy()));
1182+
auto *Sel2 = Builder.CreateSelect(
1183+
FCmpOEQ, ConstantFP::getInfinity(Builder.getDoubleTy(), /*Neg*/ true),
1184+
Sel1);
1185+
return Sel2;
1186+
}
1187+
case Builtin::BIscalbn:
1188+
case Builtin::BI__builtin_scalbn:
1189+
return emitBinaryExpMaybeConstrainedFPBuiltin(
1190+
*this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp);
11451191
default:
11461192
return nullptr;
11471193
}

‎clang/test/CodeGen/logb_scalbn.c

Copy file name to clipboard
+237Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
3+
4+
// CHECK-LABEL: define dso_local void @test_logb(
5+
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
6+
// CHECK-NEXT: [[ENTRY:.*:]]
7+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
8+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
9+
// CHECK-NEXT: [[TMP0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double 0x40301999A0000000)
10+
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1
11+
// CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
12+
// CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to double
13+
// CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fabs.f64(double 0x40301999A0000000)
14+
// CHECK-NEXT: [[TMP5:%.*]] = fcmp one double [[TMP4]], 0x7FF0000000000000
15+
// CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP4]]
16+
// CHECK-NEXT: [[TMP7:%.*]] = select i1 false, double 0xFFF0000000000000, double [[TMP6]]
17+
// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP7]] to float
18+
// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
19+
// CHECK-NEXT: ret void
20+
//
21+
void test_logb() {
22+
float D1 = __builtin_logb(16.1f);
23+
}
24+
// CHECK-LABEL: define dso_local void @test_logb_var(
25+
// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
26+
// CHECK-NEXT: [[ENTRY:.*:]]
27+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
28+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
29+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
30+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
31+
// CHECK-NEXT: store float [[A]], ptr [[A_ADDR_ASCAST]], align 4
32+
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
33+
// CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
34+
// CHECK-NEXT: [[TMP1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[CONV]])
35+
// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP1]], 1
36+
// CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], -1
37+
// CHECK-NEXT: [[TMP4:%.*]] = sitofp i32 [[TMP3]] to double
38+
// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
39+
// CHECK-NEXT: [[CONV1:%.*]] = fpext float [[TMP5]] to double
40+
// CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fabs.f64(double [[CONV1]])
41+
// CHECK-NEXT: [[TMP7:%.*]] = fcmp one double [[TMP6]], 0x7FF0000000000000
42+
// CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP6]]
43+
// CHECK-NEXT: [[TMP9:%.*]] = fcmp oeq double [[CONV]], 0.000000e+00
44+
// CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double 0xFFF0000000000000, double [[TMP8]]
45+
// CHECK-NEXT: [[CONV2:%.*]] = fptrunc double [[TMP10]] to float
46+
// CHECK-NEXT: store float [[CONV2]], ptr [[D1_ASCAST]], align 4
47+
// CHECK-NEXT: ret void
48+
//
49+
void test_logb_var(float a) {
50+
float D1 = __builtin_logb(a);
51+
}
52+
// CHECK-LABEL: define dso_local void @test_logb_d(
53+
// CHECK-SAME: ) #[[ATTR0]] {
54+
// CHECK-NEXT: [[ENTRY:.*:]]
55+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
56+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
57+
// CHECK-NEXT: [[TMP0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double 1.510000e+01)
58+
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { double, i32 } [[TMP0]], 1
59+
// CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
60+
// CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to double
61+
// CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fabs.f64(double 1.510000e+01)
62+
// CHECK-NEXT: [[TMP5:%.*]] = fcmp one double [[TMP4]], 0x7FF0000000000000
63+
// CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP4]]
64+
// CHECK-NEXT: [[TMP7:%.*]] = select i1 false, double 0xFFF0000000000000, double [[TMP6]]
65+
// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP7]] to float
66+
// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
67+
// CHECK-NEXT: ret void
68+
//
69+
void test_logb_d() {
70+
float D1 = __builtin_logb(15.1);
71+
}
72+
// CHECK-LABEL: define dso_local void @test_logb_var_d(
73+
// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
74+
// CHECK-NEXT: [[ENTRY:.*:]]
75+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8, addrspace(5)
76+
// CHECK-NEXT: [[D1:%.*]] = alloca double, align 8, addrspace(5)
77+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
78+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
79+
// CHECK-NEXT: store double [[A]], ptr [[A_ADDR_ASCAST]], align 8
80+
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR_ASCAST]], align 8
81+
// CHECK-NEXT: [[TMP1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[TMP0]])
82+
// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { double, i32 } [[TMP1]], 1
83+
// CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], -1
84+
// CHECK-NEXT: [[TMP4:%.*]] = sitofp i32 [[TMP3]] to double
85+
// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[A_ADDR_ASCAST]], align 8
86+
// CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fabs.f64(double [[TMP5]])
87+
// CHECK-NEXT: [[TMP7:%.*]] = fcmp one double [[TMP6]], 0x7FF0000000000000
88+
// CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], double [[TMP4]], double [[TMP6]]
89+
// CHECK-NEXT: [[TMP9:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00
90+
// CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double 0xFFF0000000000000, double [[TMP8]]
91+
// CHECK-NEXT: store double [[TMP10]], ptr [[D1_ASCAST]], align 8
92+
// CHECK-NEXT: ret void
93+
//
94+
void test_logb_var_d(double a) {
95+
double D1 = __builtin_logb(a);
96+
}
97+
98+
// CHECK-LABEL: define dso_local void @test_scalbn(
99+
// CHECK-SAME: ) #[[ATTR0]] {
100+
// CHECK-NEXT: [[ENTRY:.*:]]
101+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
102+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
103+
// CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.ldexp.f64.i32(double 0x4030B33340000000, i32 10)
104+
// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP0]] to float
105+
// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
106+
// CHECK-NEXT: ret void
107+
//
108+
void test_scalbn() {
109+
float D1 = __builtin_scalbn(16.7f, 10);
110+
}
111+
// CHECK-LABEL: define dso_local void @test_scalbn_var1(
112+
// CHECK-SAME: float noundef [[A:%.*]]) #[[ATTR0]] {
113+
// CHECK-NEXT: [[ENTRY:.*:]]
114+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
115+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
116+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
117+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
118+
// CHECK-NEXT: store float [[A]], ptr [[A_ADDR_ASCAST]], align 4
119+
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
120+
// CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
121+
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.ldexp.f64.i32(double [[CONV]], i32 9)
122+
// CHECK-NEXT: [[CONV1:%.*]] = fptrunc double [[TMP1]] to float
123+
// CHECK-NEXT: store float [[CONV1]], ptr [[D1_ASCAST]], align 4
124+
// CHECK-NEXT: ret void
125+
//
126+
void test_scalbn_var1(float a) {
127+
float D1 = __builtin_scalbn(a, 9);
128+
}
129+
// CHECK-LABEL: define dso_local void @test_scalbn_var2(
130+
// CHECK-SAME: i32 noundef [[B:%.*]]) #[[ATTR0]] {
131+
// CHECK-NEXT: [[ENTRY:.*:]]
132+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
133+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
134+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
135+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
136+
// CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4
137+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
138+
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.ldexp.f64.i32(double 0x402E666660000000, i32 [[TMP0]])
139+
// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP1]] to float
140+
// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
141+
// CHECK-NEXT: ret void
142+
//
143+
void test_scalbn_var2(int b) {
144+
float D1 = __builtin_scalbn(15.2f, b);
145+
}
146+
// CHECK-LABEL: define dso_local void @test_scalbn_var3(
147+
// CHECK-SAME: float noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
148+
// CHECK-NEXT: [[ENTRY:.*:]]
149+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
150+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
151+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
152+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
153+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
154+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
155+
// CHECK-NEXT: store float [[A]], ptr [[A_ADDR_ASCAST]], align 4
156+
// CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4
157+
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
158+
// CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
159+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
160+
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.ldexp.f64.i32(double [[CONV]], i32 [[TMP1]])
161+
// CHECK-NEXT: [[CONV1:%.*]] = fptrunc double [[TMP2]] to float
162+
// CHECK-NEXT: store float [[CONV1]], ptr [[D1_ASCAST]], align 4
163+
// CHECK-NEXT: ret void
164+
//
165+
void test_scalbn_var3(float a, int b) {
166+
float D1 = __builtin_scalbn(a, b);
167+
}
168+
169+
// CHECK-LABEL: define dso_local void @test_scalbn_d(
170+
// CHECK-SAME: ) #[[ATTR0]] {
171+
// CHECK-NEXT: [[ENTRY:.*:]]
172+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
173+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
174+
// CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.ldexp.f64.i32(double 1.720000e+01, i32 10)
175+
// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP0]] to float
176+
// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
177+
// CHECK-NEXT: ret void
178+
//
179+
void test_scalbn_d() {
180+
float D1 = __builtin_scalbn(17.2, 10);
181+
}
182+
// CHECK-LABEL: define dso_local void @test_scalbn_var1_d(
183+
// CHECK-SAME: double noundef [[A:%.*]]) #[[ATTR0]] {
184+
// CHECK-NEXT: [[ENTRY:.*:]]
185+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8, addrspace(5)
186+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
187+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
188+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
189+
// CHECK-NEXT: store double [[A]], ptr [[A_ADDR_ASCAST]], align 8
190+
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR_ASCAST]], align 8
191+
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.ldexp.f64.i32(double [[TMP0]], i32 9)
192+
// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP1]] to float
193+
// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
194+
// CHECK-NEXT: ret void
195+
//
196+
void test_scalbn_var1_d(double a) {
197+
float D1 = __builtin_scalbn(a, 9);
198+
}
199+
// CHECK-LABEL: define dso_local void @test_scalbn_var2_d(
200+
// CHECK-SAME: i32 noundef [[B:%.*]]) #[[ATTR0]] {
201+
// CHECK-NEXT: [[ENTRY:.*:]]
202+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
203+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
204+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
205+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
206+
// CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4
207+
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
208+
// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.ldexp.f64.i32(double 1.540000e+01, i32 [[TMP0]])
209+
// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP1]] to float
210+
// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
211+
// CHECK-NEXT: ret void
212+
//
213+
void test_scalbn_var2_d(int b) {
214+
float D1 = __builtin_scalbn(15.4, b);
215+
}
216+
// CHECK-LABEL: define dso_local void @test_scalbn_var3_d(
217+
// CHECK-SAME: double noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
218+
// CHECK-NEXT: [[ENTRY:.*:]]
219+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8, addrspace(5)
220+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
221+
// CHECK-NEXT: [[D1:%.*]] = alloca float, align 4, addrspace(5)
222+
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
223+
// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
224+
// CHECK-NEXT: [[D1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D1]] to ptr
225+
// CHECK-NEXT: store double [[A]], ptr [[A_ADDR_ASCAST]], align 8
226+
// CHECK-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4
227+
// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[A_ADDR_ASCAST]], align 8
228+
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
229+
// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.ldexp.f64.i32(double [[TMP0]], i32 [[TMP1]])
230+
// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[TMP2]] to float
231+
// CHECK-NEXT: store float [[CONV]], ptr [[D1_ASCAST]], align 4
232+
// CHECK-NEXT: ret void
233+
//
234+
void test_scalbn_var3_d(double a, int b) {
235+
float D1 = __builtin_scalbn(a, b);
236+
}
237+

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.