Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

[clang] Introduce elementwise clz/ctz builtins #131995

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
Loading
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion 11 clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,8 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in

The integer elementwise intrinsics, including ``__builtin_elementwise_popcount``,
``__builtin_elementwise_bitreverse``, ``__builtin_elementwise_add_sat``,
``__builtin_elementwise_sub_sat`` can be called in a ``constexpr`` context.
``__builtin_elementwise_sub_sat``, ``__builtin_elementwise_clz``,
``__builtin_elementwise_ctz`` can be called in a ``constexpr`` context.

No implicit promotion of integer types takes place. The mixing of integer types
of different sizes and signs is forbidden in binary and ternary builtins.
Expand Down Expand Up @@ -847,6 +848,14 @@ of different sizes and signs is forbidden in binary and ternary builtins.
semantics, see `LangRef
<http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
for the comparison.
T __builtin_elementwise_clz(T x[, T y]) return the number of leading 0 bits in the first argument. If integer types
the first argument is 0 and an optional second argument is provided,
the second argument is returned. If the first argument is 0 but only
one argument is provided, the result is undefined.
T __builtin_elementwise_ctz(T x[, T y]) return the number of trailing 0 bits in the first argument. If integer types
the first argument is 0 and an optional second argument is provided,
the second argument is returned. If the first argument is 0 but only
one argument is provided, the result is undefined.
============================================== ====================================================================== =========================================


Expand Down
12 changes: 12 additions & 0 deletions 12 clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,18 @@ def ElementwiseSubSat : Builtin {
let Prototype = "void(...)";
}

def ElementwiseClz : Builtin {
let Spellings = ["__builtin_elementwise_clz"];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
let Prototype = "void(...)";
}

def ElementwiseCtz : Builtin {
let Spellings = ["__builtin_elementwise_ctz"];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
frasercrmck marked this conversation as resolved.
Show resolved Hide resolved
let Prototype = "void(...)";
}

def ReduceMax : Builtin {
let Spellings = ["__builtin_reduce_max"];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
Expand Down
55 changes: 52 additions & 3 deletions 55 clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11530,6 +11530,49 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case Builtin::BI__builtin_elementwise_clz:
case Builtin::BI__builtin_elementwise_ctz: {
APValue SourceLHS;
std::optional<APValue> Fallback;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS))
return false;
if (E->getNumArgs() > 1) {
APValue FallbackTmp;
if (!EvaluateAsRValue(Info, E->getArg(1), FallbackTmp))
return false;
Fallback = FallbackTmp;
}

QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
unsigned SourceLen = SourceLHS.getVectorLength();
SmallVector<APValue, 4> ResultElements;
ResultElements.reserve(SourceLen);

for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
if (!LHS) {
// Without a fallback, a zero element is undefined
if (!Fallback)
return false;
ResultElements.push_back(Fallback->getVectorElt(EltNum));
continue;
}
switch (E->getBuiltinCallee()) {
case Builtin::BI__builtin_elementwise_clz:
ResultElements.push_back(APValue(
APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), LHS.countl_zero()),
DestEltTy->isUnsignedIntegerOrEnumerationType())));
break;
case Builtin::BI__builtin_elementwise_ctz:
ResultElements.push_back(APValue(
APSInt(APInt(Info.Ctx.getIntWidth(DestEltTy), LHS.countr_zero()),
DestEltTy->isUnsignedIntegerOrEnumerationType())));
break;
}
}

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
}
}

Expand Down Expand Up @@ -13081,6 +13124,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case Builtin::BI__builtin_clzll:
case Builtin::BI__builtin_clzs:
case Builtin::BI__builtin_clzg:
case Builtin::BI__builtin_elementwise_clz:
case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes
case Builtin::BI__lzcnt:
case Builtin::BI__lzcnt64: {
Expand All @@ -13089,7 +13133,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return false;

std::optional<APSInt> Fallback;
if (BuiltinOp == Builtin::BI__builtin_clzg && E->getNumArgs() > 1) {
if ((BuiltinOp == Builtin::BI__builtin_clzg ||
BuiltinOp == Builtin::BI__builtin_elementwise_clz) &&
E->getNumArgs() > 1) {
APSInt FallbackTemp;
if (!EvaluateInteger(E->getArg(1), FallbackTemp, Info))
return false;
Expand Down Expand Up @@ -13161,13 +13207,16 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case Builtin::BI__builtin_ctzl:
case Builtin::BI__builtin_ctzll:
case Builtin::BI__builtin_ctzs:
case Builtin::BI__builtin_ctzg: {
case Builtin::BI__builtin_ctzg:
case Builtin::BI__builtin_elementwise_ctz: {
APSInt Val;
if (!EvaluateInteger(E->getArg(0), Val, Info))
return false;

std::optional<APSInt> Fallback;
if (BuiltinOp == Builtin::BI__builtin_ctzg && E->getNumArgs() > 1) {
if ((BuiltinOp == Builtin::BI__builtin_ctzg ||
BuiltinOp == Builtin::BI__builtin_elementwise_ctz) &&
E->getNumArgs() > 1) {
APSInt FallbackTemp;
if (!EvaluateInteger(E->getArg(1), FallbackTemp, Info))
return false;
Expand Down
18 changes: 12 additions & 6 deletions 18 clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3298,9 +3298,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_ctz:
case Builtin::BI__builtin_ctzl:
case Builtin::BI__builtin_ctzll:
case Builtin::BI__builtin_ctzg: {
bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
E->getNumArgs() > 1;
case Builtin::BI__builtin_ctzg:
case Builtin::BI__builtin_elementwise_ctz: {
bool HasFallback =
(BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg ||
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_ctz) &&
E->getNumArgs() > 1;

Value *ArgValue =
HasFallback ? EmitScalarExpr(E->getArg(0))
Expand Down Expand Up @@ -3330,9 +3333,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_clz:
case Builtin::BI__builtin_clzl:
case Builtin::BI__builtin_clzll:
case Builtin::BI__builtin_clzg: {
bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
E->getNumArgs() > 1;
case Builtin::BI__builtin_clzg:
case Builtin::BI__builtin_elementwise_clz: {
bool HasFallback =
(BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg ||
BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_clz) &&
E->getNumArgs() > 1;

Value *ArgValue =
HasFallback ? EmitScalarExpr(E->getArg(0))
Expand Down
13 changes: 13 additions & 0 deletions 13 clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2918,6 +2918,19 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
TheCall->setType(Magnitude.get()->getType());
break;
}
case Builtin::BI__builtin_elementwise_clz:
case Builtin::BI__builtin_elementwise_ctz:
// These builtins can be unary or binary. Note for empty calls we call the
// unary checker in order to not emit an error that says the function
// expects 2 arguments, which would be misleading.
if (TheCall->getNumArgs() <= 1) {
if (PrepareBuiltinElementwiseMathOneArgCall(
TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
return ExprError();
} else if (BuiltinElementwiseMath(
TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy))
return ExprError();
break;
case Builtin::BI__builtin_reduce_max:
case Builtin::BI__builtin_reduce_min: {
if (PrepareBuiltinReduceMathOneArgCall(TheCall))
Expand Down
96 changes: 96 additions & 0 deletions 96 clang/test/CodeGen/builtins-elementwise-math.c
Original file line number Diff line number Diff line change
Expand Up @@ -1176,3 +1176,99 @@ void test_builtin_elementwise_fma(float f32, double f64,
half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0);

}

void test_builtin_elementwise_clz(si8 vs1, si8 vs2, u4 vu1,
long long int lli, short si,
_BitInt(31) bi, int i,
char ci) {
// CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
// CHECK-NEXT: call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true)
vs1 = __builtin_elementwise_clz(vs1);

// CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
// CHECK-NEXT: [[CLZ:%.+]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[V8S1]], i1 true)
// CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer
// CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr
// select <8 x i1> [[ISZERO]], <8 x i16> [[CLZ]], <8 x i16> [[V8S2]]
vs1 = __builtin_elementwise_clz(vs1, vs2);

// CHECK: [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr
// CHECK-NEXT: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[V4U1]], i1 true)
vu1 = __builtin_elementwise_clz(vu1);

// CHECK: [[LLI:%.+]] = load i64, ptr %lli.addr
// CHECK-NEXT: call i64 @llvm.ctlz.i64(i64 [[LLI]], i1 true)
lli = __builtin_elementwise_clz(lli);

// CHECK: [[SI:%.+]] = load i16, ptr %si.addr
// CHECK-NEXT: call i16 @llvm.ctlz.i16(i16 [[SI]], i1 true)
si = __builtin_elementwise_clz(si);

// CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
// CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
// CHECK-NEXT: call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true)
bi = __builtin_elementwise_clz(bi);

// CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
// CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
// CHECK-NEXT: [[CLZ:%.+]] = call i31 @llvm.ctlz.i31(i31 [[BI2]], i1 true)
// CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0
// CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[CLZ]]
bi = __builtin_elementwise_clz(bi, (_BitInt(31))1);

// CHECK: [[I:%.+]] = load i32, ptr %i.addr
// CHECK-NEXT: call i32 @llvm.ctlz.i32(i32 [[I]], i1 true)
i = __builtin_elementwise_clz(i);

// CHECK: [[CI:%.+]] = load i8, ptr %ci.addr
// CHECK-NEXT: call i8 @llvm.ctlz.i8(i8 [[CI]], i1 true)
ci = __builtin_elementwise_clz(ci);
}

void test_builtin_elementwise_ctz(si8 vs1, si8 vs2, u4 vu1,
long long int lli, short si,
_BitInt(31) bi, int i,
char ci) {
// CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
// CHECK-NEXT: call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true)
vs1 = __builtin_elementwise_ctz(vs1);

// CHECK: [[V8S1:%.+]] = load <8 x i16>, ptr %vs1.addr
// CHECK-NEXT: [[ctz:%.+]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[V8S1]], i1 true)
// CHECK-NEXT: [[ISZERO:%.+]] = icmp eq <8 x i16> [[V8S1]], zeroinitializer
// CHECK-NEXT: [[V8S2:%.+]] = load <8 x i16>, ptr %vs2.addr
// select <8 x i1> [[ISZERO]], <8 x i16> [[ctz]], <8 x i16> [[V8S2]]
vs1 = __builtin_elementwise_ctz(vs1, vs2);

// CHECK: [[V4U1:%.+]] = load <4 x i32>, ptr %vu1.addr
// CHECK-NEXT: call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[V4U1]], i1 true)
vu1 = __builtin_elementwise_ctz(vu1);

// CHECK: [[LLI:%.+]] = load i64, ptr %lli.addr
// CHECK-NEXT: call i64 @llvm.cttz.i64(i64 [[LLI]], i1 true)
lli = __builtin_elementwise_ctz(lli);

// CHECK: [[SI:%.+]] = load i16, ptr %si.addr
// CHECK-NEXT: call i16 @llvm.cttz.i16(i16 [[SI]], i1 true)
si = __builtin_elementwise_ctz(si);

// CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
// CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
// CHECK-NEXT: call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true)
bi = __builtin_elementwise_ctz(bi);

// CHECK: [[BI1:%.+]] = load i32, ptr %bi.addr
// CHECK-NEXT: [[BI2:%.+]] = trunc i32 [[BI1]] to i31
// CHECK-NEXT: [[ctz:%.+]] = call i31 @llvm.cttz.i31(i31 [[BI2]], i1 true)
// CHECK-NEXT: [[ISZERO:%.+]] = icmp eq i31 [[BI2]], 0
// CHECK-NEXT: select i1 [[ISZERO]], i31 1, i31 [[ctz]]
bi = __builtin_elementwise_ctz(bi, (_BitInt(31))1);

// CHECK: [[I:%.+]] = load i32, ptr %i.addr
// CHECK-NEXT: call i32 @llvm.cttz.i32(i32 [[I]], i1 true)
i = __builtin_elementwise_ctz(i);

// CHECK: [[CI:%.+]] = load i8, ptr %ci.addr
// CHECK-NEXT: call i8 @llvm.cttz.i8(i8 [[CI]], i1 true)
ci = __builtin_elementwise_ctz(ci);
}
44 changes: 44 additions & 0 deletions 44 clang/test/Sema/builtins-elementwise-math.c
Original file line number Diff line number Diff line change
Expand Up @@ -1202,3 +1202,47 @@ void test_builtin_elementwise_fma(int i32, int2 v2i32, short i16,
c3 = __builtin_elementwise_fma(f32, f32, c3);
// expected-error@-1 {{3rd argument must be a scalar or vector of floating-point types (was '_Complex float')}}
}

void test_builtin_elementwise_clz(int i32, int2 v2i32, short i16,
double f64, double2 v2f64) {
f64 = __builtin_elementwise_clz(f64);
// expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'double')}}

_Complex float c1;
c1 = __builtin_elementwise_clz(c1);
// expected-error@-1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}}

v2i32 = __builtin_elementwise_clz(v2i32, i32);
// expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int')}}

v2i32 = __builtin_elementwise_clz(v2i32, f64);
// expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'double')}}

v2i32 = __builtin_elementwise_clz();
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}

v2i32 = __builtin_elementwise_clz(v2i32, v2i32, f64);
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
}

void test_builtin_elementwise_ctz(int i32, int2 v2i32, short i16,
double f64, double2 v2f64) {
f64 = __builtin_elementwise_ctz(f64);
// expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'double')}}

_Complex float c1;
c1 = __builtin_elementwise_ctz(c1);
// expected-error@-1 {{1st argument must be a scalar or vector of integer types (was '_Complex float')}}

v2i32 = __builtin_elementwise_ctz(v2i32, i32);
// expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int')}}

v2i32 = __builtin_elementwise_ctz(v2i32, f64);
// expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'double')}}

v2i32 = __builtin_elementwise_ctz();
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}

v2i32 = __builtin_elementwise_ctz(v2i32, v2i32, f64);
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
}
34 changes: 34 additions & 0 deletions 34 clang/test/Sema/constant_builtins_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -853,3 +853,37 @@ static_assert(__builtin_elementwise_sub_sat((1 << 31), 42) == (1 << 31));
static_assert(__builtin_elementwise_sub_sat(0U, 1U) == 0U);
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_sub_sat((vector4char){5, 4, 3, 2}, (vector4char){1, 1, 1, 1})) == (LITTLE_END ? 0x01020304 : 0x04030201));
static_assert(__builtin_bit_cast(unsigned long long, __builtin_elementwise_sub_sat((vector4short){(short)0x8000, (short)0x8001, (short)0x8002, (short)0x8003}, (vector4short){7, 8, 9, 10}) == (LITTLE_END ? 0x8000800080008000 : 0x8000800080008000)));

static_assert(__builtin_elementwise_clz(2) == 30);
static_assert(__builtin_elementwise_clz(2, 8) == 30);
static_assert(__builtin_elementwise_clz(0, 8) == 8);
static_assert(__builtin_elementwise_clz((char)2) == 6);
static_assert(__builtin_elementwise_clz((short)2) == 14);
static_assert(__builtin_elementwise_clz((char)1) == 0x7);
static_assert(__builtin_elementwise_clz((char)4) == 0x5);
static_assert(__builtin_elementwise_clz((char)127) == 0x1);
static_assert(__builtin_elementwise_clz((char)128) == 0x0);
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_clz((vector4char){1, 4, 127, (char)128})) == (LITTLE_END ? 0x00010507 : 0x07050100));

constexpr int clz0 = __builtin_elementwise_clz(0);
// expected-error@-1 {{must be initialized by a constant expression}}
constexpr vector4char clz1 = __builtin_elementwise_clz((vector4char){1, 0, 3, 4});
// expected-error@-1 {{must be initialized by a constant expression}}
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_clz((vector4char){1, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE01FF07 : 0x07FF01FE));

static_assert(__builtin_elementwise_ctz(2) == 1);
static_assert(__builtin_elementwise_ctz(2, 8) == 1);
static_assert(__builtin_elementwise_ctz(0, 8) == 8);
static_assert(__builtin_elementwise_ctz((char)2) == 1);
static_assert(__builtin_elementwise_ctz((short)2) == 1);
static_assert(__builtin_elementwise_ctz((char)8) == 0x3);
static_assert(__builtin_elementwise_ctz((char)32) == 0x5);
static_assert(__builtin_elementwise_ctz((char)127) == 0x0);
static_assert(__builtin_elementwise_ctz((char)128) == 0x7);
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctz((vector4char){8, 32, 127, (char)128})) == (LITTLE_END ? 0x07000503 : 0x03050007));

constexpr int ctz0 = __builtin_elementwise_ctz(0);
// expected-error@-1 {{must be initialized by a constant expression}}
constexpr vector4char ctz1 = __builtin_elementwise_ctz((vector4char){1, 0, 3, 4});
// expected-error@-1 {{must be initialized by a constant expression}}
static_assert(__builtin_bit_cast(unsigned, __builtin_elementwise_ctz((vector4char){8, 0, 127, 0}, (vector4char){9, -1, 9, -2})) == (LITTLE_END ? 0xFE00FF03 : 0x03FF00FE));
Morty Proxy This is a proxified and sanitized view of the page, visit original site.