From 5681859e308283628da481c0ddc09a39345b3d46 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Tue, 15 Apr 2025 18:00:01 +0530 Subject: [PATCH] [clang] Redefine `noconvergent` and generate convergence control tokens This introduces the `-fconvergence-control` flag that emits convergence control intrinsics which are then used as the `convergencectrl` operand bundle on convergent calls. This also redefines the `noconvergent` attribute in Clang. The existing simple interpretation is that if a statement is marked `noconvergent`, then every asm call is treated as a non-convergent operation in the emitted LLVM IR. The new semantics introduces a more powerful notion that a `noconvergent` statement may contain convergent operations, but the resulting convergence constraints are limited to the scope of that statement. As a whole the statement itself does not place any convergence constraints on the control flow reaching it. When emitting convergence tokens, this attribute results in a call to the `anchor` intrinsic that determines convergence within the statement. --- clang/docs/ThreadConvergence.rst | 27 + .../Analysis/Analyses/ConvergenceCheck.h | 3 +- clang/include/clang/Basic/AttrDocs.td | 15 +- .../clang/Basic/DiagnosticSemaKinds.td | 2 + clang/include/clang/Basic/LangOptions.def | 2 + clang/include/clang/Driver/Options.td | 5 + clang/lib/Analysis/ConvergenceCheck.cpp | 43 +- clang/lib/CodeGen/CGCall.cpp | 8 +- clang/lib/CodeGen/CGStmt.cpp | 44 +- clang/lib/CodeGen/CodeGenFunction.cpp | 23 +- clang/lib/CodeGen/CodeGenFunction.h | 13 +- clang/lib/CodeGen/CodeGenModule.h | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 3 + clang/lib/Sema/AnalysisBasedWarnings.cpp | 8 +- clang/test/CodeGenHIP/convergence-tokens.hip | 687 ++++++++++++++++++ .../CodeGenHIP/noconvergent-statement.hip | 109 +++ .../noconvergent-errors/backwards_jump.hip | 23 + .../noconvergent-errors/jump-into-nest.hip | 32 + .../SemaHIP/noconvergent-errors/no-errors.hip | 83 +++ .../noconvergent-errors/simple_jump.hip | 23 + llvm/include/llvm/IR/InstrTypes.h | 8 +- llvm/include/llvm/IR/IntrinsicInst.h | 12 + .../Transforms/Utils/FixConvergenceControl.h | 21 + llvm/lib/IR/Instructions.cpp | 7 + llvm/lib/IR/IntrinsicInst.cpp | 21 + llvm/lib/Transforms/Utils/CMakeLists.txt | 1 + .../Utils/FixConvergenceControl.cpp | 191 +++++ 27 files changed, 1365 insertions(+), 51 deletions(-) create mode 100644 clang/test/CodeGenHIP/convergence-tokens.hip create mode 100644 clang/test/CodeGenHIP/noconvergent-statement.hip create mode 100644 clang/test/SemaHIP/noconvergent-errors/backwards_jump.hip create mode 100644 clang/test/SemaHIP/noconvergent-errors/jump-into-nest.hip create mode 100644 clang/test/SemaHIP/noconvergent-errors/no-errors.hip create mode 100644 clang/test/SemaHIP/noconvergent-errors/simple_jump.hip create mode 100644 llvm/include/llvm/Transforms/Utils/FixConvergenceControl.h create mode 100644 llvm/lib/Transforms/Utils/FixConvergenceControl.cpp diff --git a/clang/docs/ThreadConvergence.rst b/clang/docs/ThreadConvergence.rst index d872ab9cb77f5..ce2ca2cbeacde 100644 --- a/clang/docs/ThreadConvergence.rst +++ b/clang/docs/ThreadConvergence.rst @@ -564,6 +564,33 @@ backwards ``goto`` instead of a ``while`` statement. ``outside_loop``. This includes threads that jumped from ``G2`` as well as threads that reached ``outside_loop`` after executing ``C``. +.. _noconvergent-statement: + +The ``noconvergent`` Statement +============================== + +When a statement is marked as ``noconvergent`` the convergence of threads at the +start of this statement is not constrained by any convergent operations inside +the statement. + +- When two threads execute a statement marked ``noconvergent``, it is + implementation-defined whether they are converged at that execution. [Note: + The resulting evaluations must still satisfy the strict partial order imposed + by convergence-before.] +- When two threads are converged at the start of this statement (as determined + by the implementation), whether they are converged at each convergent + operation inside this statement is determined by the usual rules. + +For every label statement ``L`` occurring inside a ``noconvergent`` +statement, every ``goto`` or ``switch`` statement that transfers control to +``L`` must also occur inside that statement. + +.. note:: + + Convergence control tokens are necessary for correctly implementing the + "noconvergent" statement attribute. When tokens are not in use, the legacy + behaviour is retained, where the only effect of this attribute is that + ``asm`` calls within the statement are not treated as convergent operations. Implementation-defined Convergence ================================== diff --git a/clang/include/clang/Analysis/Analyses/ConvergenceCheck.h b/clang/include/clang/Analysis/Analyses/ConvergenceCheck.h index bf0d164c6a5bc..74208889a84df 100644 --- a/clang/include/clang/Analysis/Analyses/ConvergenceCheck.h +++ b/clang/include/clang/Analysis/Analyses/ConvergenceCheck.h @@ -18,7 +18,8 @@ class AnalysisDeclContext; class Sema; class Stmt; -void analyzeForConvergence(Sema &S, AnalysisDeclContext &AC); +void analyzeForConvergence(Sema &S, AnalysisDeclContext &AC, + bool GenerateWarnings, bool GenerateTokens); } // end namespace clang diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 5f37922d352b7..7ef8d3d86fe50 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -1700,13 +1700,12 @@ def NoConvergentDocs : Documentation { This attribute prevents a function from being treated as convergent; when a function is marked ``noconvergent``, calls to that function are not automatically assumed to be convergent, unless such calls are explicitly marked -as ``convergent``. If a statement is marked as ``noconvergent``, any calls to -inline ``asm`` in that statement are no longer treated as convergent. +as ``convergent``. -In languages following SPMD/SIMT programming model, e.g., CUDA/HIP, function -declarations and inline asm calls are treated as convergent by default for -correctness. This ``noconvergent`` attribute is helpful for developers to -prevent them from being treated as convergent when it's safe. +If a statement is marked as ``noconvergent``, the semantics depends on whether +convergence control tokens are used in the generated LLVM IR. When convergence +control tokens are not in use, any calls to inline ``asm`` in that statement are +treated as not convergent. .. code-block:: c @@ -1719,6 +1718,10 @@ prevent them from being treated as convergent when it's safe. [[clang::noconvergent]] { asm volatile ("nop"); } // the asm call is non-convergent } +When tokens are in use, placing the ``noconvergent`` attribute on a statement +indicates that thread convergence at the entry to that statement is +:ref:`implementation-defined`. + }]; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index dabb6d31b519a..3be697c6337bc 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6514,6 +6514,8 @@ def note_goto_affects_convergence : Note< "jump from this goto statement affects convergence">; def note_switch_case_affects_convergence : Note< "jump to this case statement affects convergence of loop">; +def err_jump_into_noconvergent : Error< + "cannot jump into a noconvergent statement from outside">; def err_goto_into_protected_scope : Error< "cannot jump from this goto statement to its label">; def ext_goto_into_protected_scope : ExtWarn< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 930c1c06d1a76..c8254af61387b 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -306,6 +306,8 @@ LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP") LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)") LANGOPT(HIPStdPar, 1, 0, "Enable Standard Parallel Algorithm Acceleration for HIP (experimental)") LANGOPT(HIPStdParInterposeAlloc, 1, 0, "Replace allocations / deallocations with HIP RT calls when Standard Parallel Algorithm Acceleration for HIP is enabled (Experimental)") +LANGOPT(ConvergenceControl, 1, 0, + "Generate explicit convergence control (experimental)") LANGOPT(OpenACC , 1, 0, "OpenACC Enabled") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 830d3459a1320..369929c30a623 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1397,6 +1397,11 @@ def fhip_emit_relocatable : Flag<["-"], "fhip-emit-relocatable">, HelpText<"Compile HIP source to relocatable">; def fno_hip_emit_relocatable : Flag<["-"], "fno-hip-emit-relocatable">, HelpText<"Do not override toolchain to compile HIP source to relocatable">; +defm convergence_control : BoolFOption<"convergence-control", + LangOpts<"ConvergenceControl">, DefaultFalse, + PosFlag, + NegFlag, + BothFlags<[], [ClangOption], " explicit convergence control tokens (experimental)">>; } // Clang specific/exclusive options for OpenACC. diff --git a/clang/lib/Analysis/ConvergenceCheck.cpp b/clang/lib/Analysis/ConvergenceCheck.cpp index 75139388ea19e..93744f8b8e495 100644 --- a/clang/lib/Analysis/ConvergenceCheck.cpp +++ b/clang/lib/Analysis/ConvergenceCheck.cpp @@ -16,6 +16,11 @@ using namespace clang; using namespace llvm; +static void errorJumpIntoNoConvergent(Sema &S, Stmt *From, Stmt *Parent) { + S.Diag(Parent->getBeginLoc(), diag::err_jump_into_noconvergent); + S.Diag(From->getBeginLoc(), diag::note_goto_affects_convergence); +} + static void warnGotoCycle(Sema &S, Stmt *From, Stmt *Parent) { S.Diag(Parent->getBeginLoc(), diag::warn_cycle_created_by_goto_affects_convergence); @@ -27,7 +32,8 @@ static void warnJumpIntoLoop(Sema &S, Stmt *From, Stmt *Loop) { S.Diag(From->getBeginLoc(), diag::note_goto_affects_convergence); } -static void checkConvergenceOnGoto(Sema &S, GotoStmt *From, ParentMap &PM) { +static void checkConvergenceOnGoto(Sema &S, GotoStmt *From, ParentMap &PM, + bool GenerateWarnings, bool GenerateTokens) { Stmt *To = From->getLabel()->getStmt(); unsigned ToDepth = PM.getParentDepth(To) + 1; @@ -42,7 +48,7 @@ static void checkConvergenceOnGoto(Sema &S, GotoStmt *From, ParentMap &PM) { } // Special case: the goto statement is a descendant of the label statement. - if (ExpandedFrom == ExpandedTo) { + if (GenerateWarnings && ExpandedFrom == ExpandedTo) { assert(ExpandedTo == To); warnGotoCycle(S, From, To); return; @@ -60,10 +66,18 @@ static void checkConvergenceOnGoto(Sema &S, GotoStmt *From, ParentMap &PM) { SmallVector Loops; for (Stmt *I = To; I != ParentFrom; I = PM.getParent(I)) { + if (GenerateTokens) + if (const auto *AS = dyn_cast(I)) + if (hasSpecificAttr(AS->getAttrs())) + errorJumpIntoNoConvergent(S, From, I); // Can't jump into a ranged-for, so we don't need to look for it here. - if (isa(I)) + if (GenerateWarnings && isa(I)) Loops.push_back(I); } + + if (!GenerateWarnings) + return; + for (Stmt *I : reverse(Loops)) warnJumpIntoLoop(S, From, I); @@ -88,21 +102,29 @@ static void warnSwitchIntoLoop(Sema &S, Stmt *Case, Stmt *Loop) { } static void checkConvergenceForSwitch(Sema &S, SwitchStmt *Switch, - ParentMap &PM) { + ParentMap &PM, bool GenerateWarnings, + bool GenerateTokens) { for (SwitchCase *Case = Switch->getSwitchCaseList(); Case; Case = Case->getNextSwitchCase()) { SmallVector Loops; for (Stmt *I = Case; I != Switch; I = PM.getParent(I)) { + if (GenerateTokens) + if (const auto *AS = dyn_cast(I)) + if (hasSpecificAttr(AS->getAttrs())) + errorJumpIntoNoConvergent(S, Switch, I); // Can't jump into a ranged-for, so we don't need to look for it here. - if (isa(I)) + if (GenerateWarnings && isa(I)) Loops.push_back(I); } - for (Stmt *I : reverse(Loops)) - warnSwitchIntoLoop(S, Case, I); + if (GenerateWarnings) { + for (Stmt *I : reverse(Loops)) + warnSwitchIntoLoop(S, Case, I); + } } } -void clang::analyzeForConvergence(Sema &S, AnalysisDeclContext &AC) { +void clang::analyzeForConvergence(Sema &S, AnalysisDeclContext &AC, + bool GenerateWarnings, bool GenerateTokens) { // Iterating over the CFG helps trim unreachable blocks, and locates Goto // statements faster than iterating over the whole body. CFG *cfg = AC.getCFG(); @@ -111,9 +133,10 @@ void clang::analyzeForConvergence(Sema &S, AnalysisDeclContext &AC) { for (CFGBlock *BI : *cfg) { Stmt *Term = BI->getTerminatorStmt(); if (GotoStmt *Goto = dyn_cast_or_null(Term)) { - checkConvergenceOnGoto(S, Goto, PM); + checkConvergenceOnGoto(S, Goto, PM, GenerateWarnings, GenerateTokens); } else if (SwitchStmt *Switch = dyn_cast_or_null(Term)) { - checkConvergenceForSwitch(S, Switch, PM); + checkConvergenceForSwitch(S, Switch, PM, GenerateWarnings, + GenerateTokens); } } } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 8cb27420dd911..20f251a5ba5b2 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5773,7 +5773,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); - // Remove call-site convergent attribute if requested. + // Remove call-site convergent attribute if this call occurs inside a + // noconvergent statement. This is the legacy behaviour when convergence + // control tokens are not in use. It only affects inline asm calls, since all + // other function calls inherit the convergent attribute from the callee. When + // convergence control tokens are in use, any inline asm calls should be + // explicitly marked noconvergent, else they simply inherit whatever token is + // currently in scope. if (InNoConvergentAttributedStmt) Attrs = Attrs.removeFnAttribute(getLLVMContext(), llvm::Attribute::Convergent); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 3562b4ea22a24..1a9a574572f67 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -829,14 +829,24 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { } break; } } + bool LegacyNoConvergent = noconvergent && !CGM.shouldEmitConvergenceTokens(); SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge); SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline); SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline); - SaveAndRestore save_noconvergent(InNoConvergentAttributedStmt, noconvergent); + SaveAndRestore save_noconvergent(InNoConvergentAttributedStmt, + LegacyNoConvergent); SaveAndRestore save_musttail(MustTailCall, musttail); SaveAndRestore save_flattenOrBranch(HLSLControlFlowAttr, flattenOrBranch); CGAtomicOptionsRAII AORAII(CGM, AA); + if (noconvergent && CGM.shouldEmitConvergenceTokens()) { + EmitBlock(createBasicBlock("noconvergent.anchor")); + ConvergenceTokenStack.push_back( + emitConvergenceAnchorToken(Builder.GetInsertBlock())); + } EmitStmt(S.getSubStmt(), S.getAttrs()); + if (noconvergent && CGM.shouldEmitConvergenceTokens()) { + ConvergenceTokenStack.pop_back(); + } } void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) { @@ -3317,16 +3327,6 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) { return F; } -// Returns the first convergence entry/loop/anchor instruction found in |BB|. -// std::nullptr otherwise. -static llvm::ConvergenceControlInst *getConvergenceToken(llvm::BasicBlock *BB) { - for (auto &I : *BB) { - if (auto *CI = dyn_cast(&I)) - return CI; - } - return nullptr; -} - llvm::CallBase * CodeGenFunction::addConvergenceControlToken(llvm::CallBase *Input) { llvm::ConvergenceControlInst *ParentToken = ConvergenceTokenStack.back(); @@ -3348,15 +3348,33 @@ CodeGenFunction::emitConvergenceLoopToken(llvm::BasicBlock *BB) { return llvm::ConvergenceControlInst::CreateLoop(*BB, ParentToken); } +llvm::ConvergenceControlInst * +CodeGenFunction::emitConvergenceAnchorToken(llvm::BasicBlock *BB) { + return llvm::ConvergenceControlInst::CreateAnchor(*BB); +} + llvm::ConvergenceControlInst * CodeGenFunction::getOrEmitConvergenceEntryToken(llvm::Function *F) { llvm::BasicBlock *BB = &F->getEntryBlock(); - llvm::ConvergenceControlInst *Token = getConvergenceToken(BB); + llvm::ConvergenceControlInst *Token = llvm::getConvergenceControlDef(*BB); if (Token) return Token; - // Adding a convergence token requires the function to be marked as + // Adding a convergence entry token requires the function to be marked as // convergent. F->setConvergent(); return llvm::ConvergenceControlInst::CreateEntry(*BB); } + +llvm::ConvergenceControlInst * +CodeGenFunction::getOrEmitConvergenceAnchorToken(llvm::Function *F) { + llvm::BasicBlock *BB = &F->getEntryBlock(); + llvm::ConvergenceControlInst *Token = llvm::getConvergenceControlDef(*BB); + if (Token) + return Token; + + // Adding a convergence anchor token requires the function to be marked as + // not convergent. + F->setNotConvergent(); + return llvm::ConvergenceControlInst::CreateAnchor(*BB); +} diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 4d29ceace646f..d9226bdd775a3 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -47,6 +47,7 @@ #include "llvm/Support/CRC.h" #include "llvm/Support/xxhash.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" +#include "llvm/Transforms/Utils/FixConvergenceControl.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include @@ -371,12 +372,6 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { assert(DeferredDeactivationCleanupStack.empty() && "mismatched activate/deactivate of cleanups!"); - if (CGM.shouldEmitConvergenceTokens()) { - ConvergenceTokenStack.pop_back(); - assert(ConvergenceTokenStack.empty() && - "mismatched push/pop in convergence stack!"); - } - bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0 && NumSimpleReturnExprs == NumReturnExprs && ReturnBlock.getBlock()->use_empty(); @@ -1362,8 +1357,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (const auto *VecWidth = CurFuncDecl->getAttr()) LargestVectorWidth = VecWidth->getVectorWidth(); - if (CGM.shouldEmitConvergenceTokens()) - ConvergenceTokenStack.push_back(getOrEmitConvergenceEntryToken(CurFn)); + if (CGM.shouldEmitConvergenceTokens()) { + llvm::ConvergenceControlInst *Token = + (FD && FD->hasAttr()) + ? getOrEmitConvergenceAnchorToken(CurFn) + : getOrEmitConvergenceEntryToken(CurFn); + ConvergenceTokenStack.push_back(Token); + } } void CodeGenFunction::EmitFunctionBody(const Stmt *Body) { @@ -1647,6 +1647,13 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, } } + if (CGM.shouldEmitConvergenceTokens()) { + ConvergenceTokenStack.pop_back(); + assert(ConvergenceTokenStack.empty() && + "mismatched push/pop in convergence stack!"); + fixConvergenceControl(CurFn); + } + // Emit the standard function epilogue. FinishFunction(BodyRange.getEnd()); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 9254c7077237f..0d20218f6cbf1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5339,15 +5339,24 @@ class CodeGenFunction : public CodeGenTypeCache { // as it's parent convergence instr. llvm::ConvergenceControlInst *emitConvergenceLoopToken(llvm::BasicBlock *BB); + // Emits a convergence_anchor instruction for the given |BB|. + llvm::ConvergenceControlInst * + emitConvergenceAnchorToken(llvm::BasicBlock *BB); + // Adds a convergence_ctrl token with |ParentToken| as parent convergence // instr to the call |Input|. llvm::CallBase *addConvergenceControlToken(llvm::CallBase *Input); - // Find the convergence_entry instruction |F|, or emits ones if none exists. - // Returns the convergence instruction. + // Find the convergence control token in the entry block of |F|, or if none + // exists, create an entry token. llvm::ConvergenceControlInst * getOrEmitConvergenceEntryToken(llvm::Function *F); + // Find the convergence control token in the entry block of |F|, or if none + // exists, create an anchor token. + llvm::ConvergenceControlInst * + getOrEmitConvergenceAnchorToken(llvm::Function *F); + private: llvm::MDNode *getRangeForLoadFromType(QualType Ty); void EmitReturnOfRValue(RValue RV, QualType Ty); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 9a0bc675e0baa..1651c87049df8 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1751,7 +1751,7 @@ class CodeGenModule : public CodeGenTypeCache { bool shouldEmitConvergenceTokens() const { // TODO: this should probably become unconditional once the controlled // convergence becomes the norm. - return getTriple().isSPIRVLogical(); + return getTriple().isSPIRVLogical() || getLangOpts().ConvergenceControl; } void addUndefinedGlobalForTailCall( diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b2dd4b3b54869..c9e37548fa835 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7098,6 +7098,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasFlag(options::OPT_fhip_new_launch_api, options::OPT_fno_hip_new_launch_api, true)) CmdArgs.push_back("-fhip-new-launch-api"); + if (Args.hasFlag(options::OPT_fconvergence_control, + options::OPT_fno_convergence_control, false)) + CmdArgs.push_back("-fconvergence-control"); Args.addOptInFlag(CmdArgs, options::OPT_fgpu_allow_device_init, options::OPT_fno_gpu_allow_device_init); Args.AddLastArg(CmdArgs, options::OPT_hipstdpar); diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 31756d3a2f75a..bb9efacd74a88 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2866,9 +2866,11 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( if (S.getLangOpts().CPlusPlus && !fscope->isCoroutine() && isNoexcept(FD)) checkThrowInNonThrowingFunc(S, FD, AC); - if (!Diags.isIgnored(diag::warn_cycle_created_by_goto_affects_convergence, - D->getBeginLoc())) - analyzeForConvergence(S, AC); + bool WarnConvergence = !Diags.isIgnored( + diag::warn_cycle_created_by_goto_affects_convergence, D->getBeginLoc()); + bool GenerateTokens = S.getLangOpts().ConvergenceControl; + if (GenerateTokens || WarnConvergence) + analyzeForConvergence(S, AC, WarnConvergence, GenerateTokens); // If none of the previous checks caused a CFG build, trigger one here // for the logical error handler. diff --git a/clang/test/CodeGenHIP/convergence-tokens.hip b/clang/test/CodeGenHIP/convergence-tokens.hip new file mode 100644 index 0000000000000..f1807acc8b0d4 --- /dev/null +++ b/clang/test/CodeGenHIP/convergence-tokens.hip @@ -0,0 +1,687 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -fconvergence-control -Wno-convergence -emit-llvm -disable-llvm-optzns -fcuda-is-device -o - %s | FileCheck %s + + +// NOTE: +// ===== +// The following tests are of particular interest: +// - jump_into_unreachable_nest +// - backwards_jump_into_nest +// - forever_loops +// - backwards_inside_label +// - switch_backwards +// - backwards_conditional +// - duffs_device + +#define __device__ __attribute__((device)) + +__device__ void foo() __attribute__((convergent)); + +// CHECK-LABEL: @_Z14jump_into_nesti +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: if.end: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: while.body: ; preds = %while.cond +// CHECK: [[TOK2:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK2]]) ] +// CHECK: do.body: ; preds = %do.cond, %if.then2 +// CHECK: [[TOK4:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK4]]) ] +// CHECK: do.end: ; preds = %do.cond +// CHECK: [[TOK7:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK7]]) ] + +__device__ int jump_into_nest(int cond) { + int i = 0; + + if (cond > 1) { + goto jumptarget; + } + + foo(); + + while (true) { + foo(); + if (cond > 1) { + do { + i++; + foo(); + jumptarget: + i++; + } while (true); + foo(); + } + } + + return i; +} + +// CHECK-LABEL: @_Z26jump_into_unreachable_nesti +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: while.body: ; preds = %while.cond +// CHECK: [[TOK2:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK2]]) ] +// CHECK: do.body: ; preds = %do.cond, %if.then +// CHECK: [[TOK4:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK4]]) ] +// CHECK: do.end: ; preds = %do.cond +// CHECK: [[TOK7:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK7]]) ] + +// A function with a nest of reducible forever loops, where the entry is from +// the label ``jumptarget`` since they are unreachable with sequential control +// flow. The detected cycles are inside out ... the do-while loop is the parent +// of the while loop. What's even more weird is that the inner loop is actually +// unreachable because the condition for the do-while never branches to it. +__device__ int jump_into_unreachable_nest(int cond) { + int i = 0; + + foo(); + + goto jumptarget; + + while (true) { + foo(); + if (cond > 1) { + do { + i++; + foo(); + jumptarget: + i++; + } while (true); + foo(); + } + } + + return i; +} + +// CHECK-LABEL: @_Z24backwards_jump_into_nesti +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: while.cond: ; preds = %if.end, %entry +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: while.body: ; preds = %while.cond +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: do.body: ; preds = %do.cond, %if.then +// CHECK: [[TOK6:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK6]]) ] +// CHECK: do.end: ; preds = %do.cond +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK1]]) ] + +// The outer while-loop dominates the backwards jump, and hence remains +// reducible. But the do-while loop does not dominate and hence becomes an +// irreducible cycle with two entries: the blocks %do.body and %jumptarget. +__device__ int backwards_jump_into_nest(int cond1, int cond2, int cond3, int cond4) { + int i = 0; + + while (i < cond1) { + foo(); + if (cond2 > i) { + do { + i++; + foo(); + jumptarget: + i++; + } while (cond3 > i); + foo(); + } + } + + if (cond4 > i) { + goto jumptarget; + } + + return i; +} + +// CHECK-LABEL: @_Z17forward_all_kindsiiiii +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: for.body: ; preds = %for.cond +// CHECK: [[TOK4:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK4]]) ] +// CHECK: while.body: ; preds = %while.cond +// CHECK: [[TOK8:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK8]]) ] +// CHECK: do.body: ; preds = %do.cond, %if.then6 +// CHECK: [[TOK10:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK10]]) ] +// CHECK: while.cond10: ; preds = %while.body12, %if.else +// CHECK: [[TOK15:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: while.body12: ; preds = %while.cond10 +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK15]]) ] + +// All loops become irreducible due to the jump into the nest. +__device__ int forward_all_kinds(int cond1, int cond2, int cond3, int cond4, int cond5) { + int i = 0; + + if (cond1 < 0) + goto jumptarget; + + for (; i < cond5;) { + foo(); + if (cond2 != 0) { + while (i < cond3) { + foo(); + if (cond4 > 1) { + do { + foo(); + i++; + jumptarget: + i++; + } while (i < cond4); + } + } + } else { + while (i < cond3) { + foo(); + i++; + } + } + } + return i; +} + +// CHECK-LABEL: @_Z13forever_loopsi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: for.cond: ; preds = %if.end +// CHECK: [[TOK2:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK2]]) ] +// CHECK: jumptarget: ; preds = %while.body, %if.then +// CHECK: [[TOK3:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK3]]) ] + +// The outer for-loop is "interesting". The frontend eliminates its backedge +// effectively making it a non-loop, since control never exits the inner +// while-loop. +__device__ int forever_loops(int cond1) { + int i = 0; + + if (cond1 < 0) + goto jumptarget; + + for (;;) { + foo(); + while (true) { + jumptarget: + i++; + foo(); + } + } + + return i; +} + +// CHECK-LABEL: @_Z12nest_to_nesti +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: for.cond: ; preds = %if.end3, %entry +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: while.cond: ; preds = %if.end, %if.then +// CHECK: [[TOK3:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: while.body: ; preds = %while.cond +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK3]]) ] +// CHECK: while.body5: ; preds = %while.cond4 +// CHECK: [[TOK6:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK6]]) ] +// CHECK: do.body: ; preds = %do.cond, %if.then7 +// CHECK: [[TOK8:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK8]]) ] + +__device__ int nest_to_nest(int cond) { + int i = 0; + + for (;;) { + foo(); + if (cond != 0) { + while (true) { + foo(); + if (cond > 1) { + goto jumptarget; + } + } + } + } + + while (true) { + foo(); + if (cond > 1) { + do { + foo(); + i++; + jumptarget: + i++; + } while (true); + } + } + + return i; +} + +// CHECK-LABEL: @_Z22backwards_nest_to_nesti +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: while.cond: ; preds = %if.end, %entry +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: while.body: ; preds = %while.cond +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: do.body: ; preds = %do.cond, %if.then +// CHECK: [[TOK3:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK3]]) ] + +__device__ int backwards_nest_to_nest(int cond) { + int i = 0; + + while (true) { + foo(); + if (cond > 1) { + do { + foo(); + i++; + jumptarget: + i++; + } while (true); + } + } + + for (;;) { + foo(); + if (cond != 0) { + while (true) { + foo(); + if (cond > 1) { + goto jumptarget; + } + } + } + } + + return i; +} + +// CHECK-LABEL: @_Z17backwards_and_outi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: if.then: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: jumptarget: ; preds = %if.then4, %if.then +// CHECK: [[TOK2:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK2]]) ] +// CHECK: for.cond: ; preds = %if.end6, %if.end +// CHECK: [[TOK4:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK4]]) ] +// CHECK: while.cond: ; preds = %if.end5, %if.then2 +// CHECK: [[TOK6:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK4]]) ] +// CHECK: while.body: ; preds = %while.cond +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK6]]) ] + +__device__ int backwards_and_out(int cond) { + int i = 0; + + if (cond > 1) { + foo(); + jumptarget: + foo(); + i++; + } + + for (;;) { + foo(); + if (cond != 0) { + while (true) { + foo(); + if (cond > 1) { + goto jumptarget; + } + } + } + } + + return i; +} + +// CHECK-LABEL: @_Z21backwards_inside_loopi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: for.cond: ; preds = %if.end, %entry +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: jumptarget: ; preds = %if.then, %for.cond +// CHECK: [[TOK3:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK3]]) ] + +__device__ int backwards_inside_loop(int cond) { + int i = 0; + int j = 0; + int k = 0; + for (;;) { + foo(); + i++; + jumptarget: + foo(); + j++; + k++; + if (cond > 5) + goto jumptarget; + } + return i + j + k; +} + +// CHECK-LABEL: @_Z19loop_backwards_loopi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: for.cond: ; preds = %entry +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: while.cond: ; preds = %if.end, %jumptarget +// CHECK: [[TOK4:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: while.body: ; preds = %while.cond +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK4]]) ] + +__device__ int loop_backwards_loop(int cond) { + int i = 0; + int j = 0; + int k = 0; + for (;;) { + foo(); + i++; + jumptarget: + j++; + while (true) { + foo(); + k++; + if (cond > 5) + goto jumptarget; + } + } + return i + j + k; +} + +// CHECK-LABEL: @_Z22backwards_inside_labeli +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: jumptarget: ; preds = %if.then, %entry +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK1]]) ] + +// The border-case where a goto is a child of its own target label statement. +// This is actually a reducible cycle and can use the loop intrinsic if we +// strengthen the semantics of the backwards goto. +__device__ int backwards_inside_label(int cond) { + int i = 0; + int j = 0; + jumptarget: { + foo(); + i++; + j++; + if (cond > 5) + goto jumptarget; + } + return i + j; +} + +// CHECK-LABEL: @_Z29backwards_label_inside_branchi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: jumptarget: ; preds = %if.then3, %if.then +// CHECK: [[TOK2:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK2]]) ] + +__device__ int backwards_label_inside_branch(int cond) { + int i = 0; + int j = 0; + if (cond > 0) { + jumptarget: { + foo(); + i++; + j++; + if (cond > 5) + goto jumptarget; + } + } else { + j++; + } + return i + j; +} + +// CHECK-LABEL: @_Z30backwards_inside_labelled_loopi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: while.cond: ; preds = %if.end, %jumptarget +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: while.body: ; preds = %while.cond +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK1]]) ] + +__device__ int backwards_inside_labelled_loop(int cond) { + int i = 0; + int j = 0; + jumptarget: while (true) { + foo(); + i++; + j++; + if (cond > 5) + goto jumptarget; + } + return i + j; +} + +// CHECK-LABEL: @_Z15switch_sidewaysi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: sw.bb: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: sw.bb1: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: sw.bb2: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: jumptarget: ; preds = %sw.bb2, %sw.bb1 +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: sw.default: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] + +// Convergence is well-defined for a jump within a switch statement. +__device__ int switch_sideways(int cond) { + int i = 0; + switch (cond) { + case 10: + foo(); + i++; + break; + case 20: + foo(); + i += 2; + goto jumptarget; + break; + case 30: { + foo(); + i += 3; + jumptarget: + foo(); + i += 4; + break; + } + default: + foo(); + break; + } + return i; +} + +// CHECK-LABEL: @_Z16switch_backwardsi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: sw.bb: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: sw.bb1: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: jumptarget: ; preds = %sw.bb3, %sw.bb1 +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: sw.bb3: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: sw.default: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] + +// Convergence is declared to be implementation-defined for a backward jump +// within a switch statement. But we don't actually do anything if it does not +// create a cycle. So it's perfectly legal that the convergent operations below +// use the entry token and not their own anchor token. +__device__ int switch_backwards(int cond) { + int i = 0; + switch (cond) { + case 10: + foo(); + i++; + break; + case 30: { + foo(); + i += 3; + jumptarget: + foo(); + i += 4; + break; + } + case 20: + foo(); + i += 2; + goto jumptarget; + break; + default: + foo(); + break; + } + return i; +} + +// CHECK-LABEL: @_Z18switch_fallthroughi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: sw.bb: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: sw.bb1: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: sw.bb2: ; preds = %entry, %sw.bb1 +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: sw.default: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] + +// Convergence is well-defined with fallthrough. +__device__ int switch_fallthrough(int cond) { + int i = 0; + switch (cond) { + case 10: + foo(); + i++; + break; + case 20: + foo(); + i += 2; + case 30: { + foo(); + i += 3; + i += 4; + break; + } + default: + foo(); + break; + } + return i; +} + +// CHECK-LABEL: @_Z19forward_conditionali +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: if.then: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: jumptarget: ; preds = %if.else, %if.then +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] + +// Convergence is well-defined for forward jumps from the then-part to the +// else-part of a conditional statement. +__device__ int forward_conditional(int cond) { + int i = 0; + + if (cond > 0) { + foo(); + i++; + goto jumptarget; + } else { + jumptarget: + foo(); + i++; + } + + return i; +} + +// CHECK-LABEL: @_Z21backwards_conditionali +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: if.then: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: if.else: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] + +// Convergence is declared to be implementation-defined for a backwards jump +// from the else-part to the then-part of a conditional statement. But just like +// the backwards jump in a switch, we don't actually do anything about it if it +// does not produce a cycle. Hence the convergent operations here use the entry +// token rather than their own anchor token. +__device__ int backwards_conditional(int cond) { + int i = 0; + + foo(); + if (cond > 0) { + foo(); + jumptarget: + i++; + } else { + foo(); + i++; + goto jumptarget; + } + + return i; +} + +// CHECK-LABEL: @_Z12duffs_devicei +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: sw.bb: ; preds = %entry +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: do.body: ; preds = %do.cond, %sw.bb +// CHECK: [[TOK3:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK3]]) ] +// CHECK: sw.bb1: ; preds = %entry, %do.body +// CHECK: [[TOK5:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #3 [ "convergencectrl"(token [[TOK5]]) ] + +// Note that the convergent operation in case 0 outside the do-while loop has +// well-defined convergence, and must use the entry token. The other operations +// must use anchor tokens. +__device__ int duffs_device(int count) { + int i = 0; + int n = (count + 7) / 8; + switch (count % 8) { + case 0: + foo(); + do { i++; + foo(); + case 7: i++; + foo(); + case 6: i++; + case 5: i++; + case 4: i++; + case 3: i++; + case 2: i++; + case 1: i++; + } while (--n > 0); + } + return i; +} diff --git a/clang/test/CodeGenHIP/noconvergent-statement.hip b/clang/test/CodeGenHIP/noconvergent-statement.hip new file mode 100644 index 0000000000000..64579ec226768 --- /dev/null +++ b/clang/test/CodeGenHIP/noconvergent-statement.hip @@ -0,0 +1,109 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -fconvergence-control -emit-llvm -disable-llvm-optzns -fcuda-is-device -o - %s | FileCheck %s + +#define __device__ __attribute__((device)) + +__device__ void foo() __attribute__((convergent)); + +__device__ bool check() __attribute__((convergent)); + +// ------ The entry of a noconvergent function uses an anchor +// CHECK-LABEL: @_Z3bari +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #4 [ "convergencectrl"(token [[TOK0]]) ] +__device__ __attribute__((noconvergent)) int bar(int x) { + foo(); + return x; +} + +// ------ No token on a call to a noconvergent function +// CHECK-LABEL: @_Z17call_noconvergenti +// CHECK: %call = call noundef i32 @_Z3bari(i32 noundef %4) # +__device__ int call_noconvergent(int cond) { + int i = 0; + + while (i < cond) { + i = bar(i); + } + + return i; +} + +// CHECK-LABEL: @_Z16branch_statementi +// CHECK: noconvergent.anchor: +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: %call1 = call noundef zeroext i1 @_Z5checkv() #4 [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: if.then: +// CHECK: call void @_Z3foov() #4 [ "convergencectrl"(token [[TOK1]]) ] +__device__ int branch_statement(int cond) { + int i = 0; + + [[clang::noconvergent]] if (check()) { + foo(); + } else { + foo(); + } + + return i; +} + +// CHECK-LABEL: @_Z19branch_substatementi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: %call1 = call noundef zeroext i1 @_Z5checkv() #4 [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: noconvergent.anchor: +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #4 [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: noconvergent.anchor2: +// CHECK: [[TOK2:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #4 [ "convergencectrl"(token [[TOK2]]) ] +__device__ int branch_substatement(int cond) { + int i = 0; + + if (check()) [[clang::noconvergent]] { + foo(); + } else [[clang::noconvergent]] { + foo(); + } + + return i; +} + +// CHECK-LABEL: @_Z9loop_bodyi +// CHECK: entry: +// CHECK: [[TOK0:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK: while.cond: ; preds = %noconvergent.anchor, %entry +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK0]]) ] +// CHECK: %call1 = call noundef zeroext i1 @_Z5checkv() #4 [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: noconvergent.anchor: ; preds = %while.body +// CHECK: [[TOK2:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: call void @_Z3foov() #4 [ "convergencectrl"(token [[TOK2]]) ] +__device__ int loop_body(int cond) { + int i = 0; + + while (check()) [[clang::noconvergent]] { + foo(); + i++; + } + + return i; +} + +// CHECK-LABEL: @_Z14loop_statementi +// CHECK: noconvergent.anchor: +// CHECK: [[TOK1:%[0-9]+]] = call token @llvm.experimental.convergence.anchor() +// CHECK: while.cond: +// CHECK: [[TOK2:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOK1]]) ] +// CHECK: %call1 = call noundef zeroext i1 @_Z5checkv() #4 [ "convergencectrl"(token [[TOK2]]) ] +// CHECK: while.body: +// CHECK: call void @_Z3foov() #4 [ "convergencectrl"(token [[TOK2]]) ] +__device__ int loop_statement(int cond) { + int i = 0; + + [[clang::noconvergent]] while (check()) { + foo(); + i++; + } + + return i; +} diff --git a/clang/test/SemaHIP/noconvergent-errors/backwards_jump.hip b/clang/test/SemaHIP/noconvergent-errors/backwards_jump.hip new file mode 100644 index 0000000000000..557431a75feda --- /dev/null +++ b/clang/test/SemaHIP/noconvergent-errors/backwards_jump.hip @@ -0,0 +1,23 @@ +// REQUIRES: amdgpu-registered-target +// RUN: not %clang_cc1 -fsyntax-only -triple amdgcn -fconvergence-control -Wno-unused-value %s -o - 2>&1 | FileCheck %s + +#define __device__ __attribute__((device)) + +__device__ void foo() __attribute__((convergent)); + +__device__ int simple_jump_backwards(int cond) { + int i = 0; + + // CHECK: :[[# @LINE+1]]:{{[0-9]+}}: error: cannot jump into a noconvergent statement from outside + while (true) [[clang::noconvergent]] { + foo(); + jumptarget: + i++; + } + + if (cond > 1) { + goto jumptarget; + } + + return i; +} diff --git a/clang/test/SemaHIP/noconvergent-errors/jump-into-nest.hip b/clang/test/SemaHIP/noconvergent-errors/jump-into-nest.hip new file mode 100644 index 0000000000000..8c76dc14fd5b4 --- /dev/null +++ b/clang/test/SemaHIP/noconvergent-errors/jump-into-nest.hip @@ -0,0 +1,32 @@ +// REQUIRES: amdgpu-registered-target +// RUN: not %clang_cc1 -fsyntax-only -triple amdgcn -fconvergence-control -Wno-unused-value %s -o - 2>&1 | FileCheck %s + +#define __device__ __attribute__((device)) + +__device__ void foo() __attribute__((convergent)); + +__device__ int jump_into_nest(int cond) { + int i = 0; + + if (cond > 1) { + goto jumptarget; + } + + foo(); + + while (true) { + foo(); + if (cond > 1) { + // CHECK: :[[# @LINE+1]]:{{[0-9]+}}: error: cannot jump into a noconvergent statement from outside + do [[clang::noconvergent]] { + i++; + foo(); + jumptarget: + i++; + } while (true); + foo(); + } + } + + return i; +} diff --git a/clang/test/SemaHIP/noconvergent-errors/no-errors.hip b/clang/test/SemaHIP/noconvergent-errors/no-errors.hip new file mode 100644 index 0000000000000..6280863658554 --- /dev/null +++ b/clang/test/SemaHIP/noconvergent-errors/no-errors.hip @@ -0,0 +1,83 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -fconvergence-control -Wno-unused-value %s -o - 2>&1 + +#define __device__ __attribute__((device)) + +__device__ void foo() __attribute__((convergent)); + +__device__ int annotated_target(int cond) { + int i = 0; + + if (cond > 1) { + goto jumptarget; + } + + while (true) { + foo(); + jumptarget: [[clang::noconvergent]] { + foo(); + i++; + } + } + + return i; +} + +__device__ int jump_across(int cond) { + int i = 0; + + if (cond > 1) { + goto jumptarget; + } + + while (true) [[clang::noconvergent]] { + foo(); + i++; + } + + jumptarget: + return i; +} + +__device__ int jump_internal(int cond) { + int i = 0; + + while (true) [[clang::noconvergent]] { + foo(); + jumptarget: + i++; + + if (cond > 1) { + goto jumptarget; + } + } + return i; +} + +__device__ void nest_to_nest(int cond) { + int i = 0; + + for (;;) { + foo(); + if (cond != 0) { + [[clang::noconvergent]] while (true) { + foo(); + if (cond > 1) { + goto jumptarget; + } + } + } + } + + while (true) { + foo(); + if (cond > 1) { + do { + foo(); + i++; + jumptarget: + i++; + } while (true); + } + } +} diff --git a/clang/test/SemaHIP/noconvergent-errors/simple_jump.hip b/clang/test/SemaHIP/noconvergent-errors/simple_jump.hip new file mode 100644 index 0000000000000..4321e99aa6467 --- /dev/null +++ b/clang/test/SemaHIP/noconvergent-errors/simple_jump.hip @@ -0,0 +1,23 @@ +// REQUIRES: amdgpu-registered-target +// RUN: not %clang_cc1 -fsyntax-only -triple amdgcn -fconvergence-control -Wno-unused-value %s -o - 2>&1 | FileCheck %s + +#define __device__ __attribute__((device)) + +__device__ void foo() __attribute__((convergent)); + +__device__ int simple_jump(int cond) { + int i = 0; + + if (cond > 1) { + goto jumptarget; + } + + // CHECK: :[[# @LINE+1]]:{{[0-9]+}}: error: cannot jump into a noconvergent statement from outside + while (true) [[clang::noconvergent]] { + foo(); + jumptarget: + i++; + } + + return i; +} diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 8e47e3c7b3a7c..616c374a07b90 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -45,6 +45,7 @@ class StringRef; class Type; class Value; class ConstantRange; +class ConvergenceControlInst; namespace Intrinsic { typedef unsigned ID; @@ -1180,12 +1181,7 @@ class CallBase : public Instruction { InsertPosition InsertPt = nullptr); /// Return the convergence control token for this call, if it exists. - Value *getConvergenceControlToken() const { - if (auto Bundle = getOperandBundle(llvm::LLVMContext::OB_convergencectrl)) { - return Bundle->Inputs[0].get(); - } - return nullptr; - } + ConvergenceControlInst *getConvergenceControlToken() const; static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Call || diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 93750d6e3845e..5cad494298b9b 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -1889,6 +1889,18 @@ class ConvergenceControlInst : public IntrinsicInst { ConvergenceControlInst *Parent); }; +/// Returns the first occurence of a ConvergenceControlInst in \p BB +inline ConvergenceControlInst *getConvergenceControlDef(BasicBlock &BB) { + for (auto &I : BB) { + if (auto *CI = dyn_cast(&I)) + return CI; + } + return nullptr; +} + +CallBase *setConvergenceControlToken(CallBase *CB, + ConvergenceControlInst *Token); + } // end namespace llvm #endif // LLVM_IR_INTRINSICINST_H diff --git a/llvm/include/llvm/Transforms/Utils/FixConvergenceControl.h b/llvm/include/llvm/Transforms/Utils/FixConvergenceControl.h new file mode 100644 index 0000000000000..7432692108af2 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/FixConvergenceControl.h @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A utility function to fix convergence control tokens in the presence of +// irreducible control flow. +// +//===----------------------------------------------------------------------===// + +namespace llvm { +class Function; + +// Detect and fix invalid convergence control tokens after the entire function +// is emitted in LLVM IR. +void fixConvergenceControl(llvm::Function *F); + +} // namespace llvm diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 18109bf107858..3fba7fa229c11 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -623,6 +623,13 @@ bool CallBase::hasClobberingOperandBundles() const { getIntrinsicID() != Intrinsic::assume; } +ConvergenceControlInst *CallBase::getConvergenceControlToken() const { + if (auto Bundle = getOperandBundle(llvm::LLVMContext::OB_convergencectrl)) { + return cast(Bundle->Inputs[0].get()); + } + return nullptr; +} + MemoryEffects CallBase::getMemoryEffects() const { MemoryEffects ME = getAttributes().getMemoryEffects(); if (auto *Fn = dyn_cast(getCalledOperand())) { diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 256bce1abe71f..42e9fcde9afb1 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -913,3 +913,24 @@ ConvergenceControlInst::CreateLoop(BasicBlock &BB, auto *Call = CallInst::Create(Fn, {}, {OB}, "", BB.getFirstInsertionPt()); return cast(Call); } + +CallBase *llvm::setConvergenceControlToken(CallBase *CB, + ConvergenceControlInst *Token) { + llvm::Value *bundleArgs[] = {Token}; + llvm::OperandBundleDef OB("convergencectrl", bundleArgs); + + SmallVector Bundles; + for (unsigned I = 0, E = CB->getNumOperandBundles(); I != E; ++I) { + auto Bundle = CB->getOperandBundleAt(I); + if (Bundle.getTagID() == LLVMContext::OB_convergencectrl) { + continue; + } + Bundles.emplace_back(Bundle); + } + Bundles.push_back(OB); + + CallBase *NewCB = CallBase::Create(CB, Bundles, CB->getIterator()); + CB->replaceAllUsesWith(NewCB); + CB->eraseFromParent(); + return NewCB; +} diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index 78cad0d253be8..fb69ffe8ee9c1 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -25,6 +25,7 @@ add_llvm_component_library(LLVMTransformUtils EntryExitInstrumenter.cpp EscapeEnumerator.cpp Evaluator.cpp + FixConvergenceControl.cpp FixIrreducible.cpp FlattenCFG.cpp FunctionComparator.cpp diff --git a/llvm/lib/Transforms/Utils/FixConvergenceControl.cpp b/llvm/lib/Transforms/Utils/FixConvergenceControl.cpp new file mode 100644 index 0000000000000..91a9f0bff86fe --- /dev/null +++ b/llvm/lib/Transforms/Utils/FixConvergenceControl.cpp @@ -0,0 +1,191 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A utility function to fix convergence control tokens in the presence of +// irreducible control flow. +// +// When generating convergence control tokens for iteration statements, CodeGen +// ignores the presence of any jumps that may cause the resulting CFG to be +// irreducible. CodeGen optimistically emits a `loop` intrinsic in the header of +// every iteration statement, which is invalid if control can jump into that +// statement. We fix this in the LLVM IR after the whole function is fully +// generated. For each irreducible cycle discovered in the CFG, we replace the +// `loop` intrinsic in the header with suitable invocations of `anchor` instead. +// +// Separately, a "backwards goto" can create a cycle that encapsulates an +// iteration statement. As a result, the `loop` intrinsic in that iteration +// statement no longer uses a token from its immediate parent cycle in the CFG. +// This is also fixed by replacing the `loop` intrinsic with an `anchor` +// intrinsic. +// +// The overall result is to produce in implementation-defined convergence as a +// result of unstructured jumps as defined in the Clang spec for convergence. +// +// ===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/FixConvergenceControl.h" +#include "llvm/IR/CycleInfo.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include + +#define DEBUG_TYPE "convergence-fixup" + +using namespace llvm; + +static void +initializeTokenSources(SmallVectorImpl &Worklist, + Function *F) { + for (Instruction &II : instructions(F)) { + if (auto *CB = dyn_cast(&II)) { + if (CB->isEntry() || CB->isAnchor()) + Worklist.push_back(CB); + } + } +} + +namespace llvm { + +void fixConvergenceControl(Function *F) { + CycleInfo CI; + CI.compute(*F); + + // F->dump(); + // CI.dump(); + + enum DecisionTy { Delete, Replace }; + + // Can't use a DenseMap because we later insert while iterating. + std::map Decision; + SmallPtrSet NonIntrinsicUsers; + + SmallVector Worklist; + initializeTokenSources(Worklist, F); + + while (!Worklist.empty()) { + ConvergenceControlInst *CB = Worklist.pop_back_val(); + LLVM_DEBUG(llvm::dbgs() << "Visiting: " << *CB << "\n"); + Cycle *CurrentCycle = CI.getCycle(CB->getParent()); + + for (Use &U : CB->uses()) { + auto *UserCB = cast(U.getUser()); + if (auto *C = dyn_cast(UserCB)) { + Worklist.push_back(C); + continue; + } + Cycle *UserCycle = CI.getCycle(UserCB->getParent()); + // A non-intrinsic user cannot use a token defined outside its own cycle. + if (UserCycle && !UserCycle->contains(CurrentCycle)) + NonIntrinsicUsers.insert(UserCB); + } + + if (!CurrentCycle) + continue; + + // A loop intrinsic is no longer useful in two cases: + // 1. Its cycle became irreducible, or, + // 2. The cycle appears `rotated` in the CFG and the call is not in the + // header. This happens when a loop statement is unreachable via + // sequential control flow, but is jumped into by a goto or switch. + // + // We will be visiting its users later. + if (!CurrentCycle->isReducible() || + CurrentCycle->getHeader() != CB->getParent()) { + LLVM_DEBUG(llvm::dbgs() << " Delete.\n"); + Decision[CB] = Delete; + continue; + } + + // A token use is valid only if the def is with the immediate parent. It's + // okay if the def is with a sibling, as long as the common parent is the + // immediate parent. + // + // The def can end up outside the parent when a goto forms a reducible cycle + // around a loop statement. Such a new reducible cycle does not itself have + // a heart. + ConvergenceControlInst *TokenUsed = CB->getConvergenceControlToken(); + Cycle *DefCycle = CI.getCycle(TokenUsed->getParent()); + assert(CurrentCycle == DefCycle || !CurrentCycle->contains(DefCycle)); + Cycle *Parent = CurrentCycle->getParentCycle(); + if (DefCycle != Parent && Parent && !Parent->contains(DefCycle)) { + // Don't overwrite if previous decision was to delete. + Decision.try_emplace(CB, Replace); + } + } + + SmallVector ToDelete; + // For deletion candidates, decide how to process each of the uses. + for (auto [CB, D] : Decision) { + if (D != Delete) + continue; + ToDelete.push_back(CB); + + for (Use &U : CB->uses()) { + auto *ConvOp = cast(U.getUser()); + // Users that are calls to the loop intrinsic can no longer use this as + // the parent token, so replace them with anchors. + if (auto *Child = dyn_cast(ConvOp)) { + // Don't overwrite if previous decision was to delete. Note that we are + // inserting while iterating over the std::map. It is possible that the + // newly inserted node is not visited, which is okay because we are only + // iterating over candidates mapped to ``Delete``. + Decision.try_emplace(Child, Replace); + continue; + } + // Other convergent users should be made non-converent. + NonIntrinsicUsers.insert(ConvOp); + } + } + + for (auto [CB, D] : Decision) { + if (D != Replace) + continue; + Cycle *CurrentCycle = CI.getCycle(CB->getParent()); + assert(CurrentCycle && CurrentCycle->isReducible()); + LLVM_DEBUG(llvm::dbgs() << " Replace with anchor: " << *CB << "\n"); + auto *Anchor = ConvergenceControlInst::CreateAnchor(*CB->getParent()); + CB->replaceAllUsesWith(Anchor); + CB->eraseFromParent(); + } + + // Make all non-intrinsic users non-convergent. It would have been convenient + // to just strip the token and the ``convergent`` attribute, but attributes + // get checked on the callee too if they don't exist on the call. We could + // have set the ``noconvergent`` attribute if it existed. For now, + // equivalently, we replace the token with an anchor. + for (CallBase *CB : NonIntrinsicUsers) { + auto *Token = ConvergenceControlInst::CreateAnchor(*CB->getParent()); + CB = setConvergenceControlToken(CB, Token); + LLVM_DEBUG(llvm::dbgs() << " Make non-convergent: " << *CB << "\n"); + } + + bool Changed = true; + while (Changed) { + Changed = false; + for (unsigned I = 0, E = ToDelete.size(); I != E; ++I) { + CallBase *CB = ToDelete[I]; + if (CB) { + LLVM_DEBUG(llvm::dbgs() << "Try delete:\n" << *CB << "\n"); + if (!CB->use_empty()) { + LLVM_DEBUG(llvm::dbgs() << " ... has pending use.\n"); + continue; + } + LLVM_DEBUG(llvm::dbgs() << " ... deleted.\n"); + CB->eraseFromParent(); + ToDelete[I] = nullptr; + Changed = true; + } + } + } + LLVM_DEBUG(for (CallBase *CB : ToDelete) assert(!CB);); + + // F->dump(); +} + +} // end namespace llvm